<a href="https://colab.research.google.com/github/DavoodSZ1993/RL/blob/main/09_DNQ.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Improt Dependencies

In [1]:
import random
import gym
import numpy as np

from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam

import os

### Set Parameters

In [2]:
env = gym.make('CartPole-v0')

batch_size = 32
n_episodes = 1001

output_dir = './cartpole'

state_size = env.observation_space.shape[0] # States: Cart Location, Cart Velocity, Pole Location, Pole Angular Velocity
action_size = env.action_space.n            # Actions: Left, Right

state_size, action_size

(4, 2)

In [3]:
if not os.path.exists(output_dir):
  os.makedirs(output_dir)

### Defining the Agent

In [5]:
class DQNAgent:
  def __init__(self, state_size, action_size):
    self.state_size = state_size
    self.action_size = action_size

    self.memory = deque(maxlen=200)

    self.gamma = 0.95

    self.epsilon = 1.0
    self.epsilon_decay = 0.995
    self.epsilon_min = 0.01

    self.learning_rate = 0.01

    self.model = self._build_model()

  def _build_model(self):
    model = Sequential()

    model.add(Dense(24, imput_dim=self.state_size, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(self.action_size, activation='linear'))

    model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))

    return model

  def remember(self, state, action, reward, next_state, done):
    self.memory.append((state, action, reward, next_state, done))

  def act(self, state):
    if np.random.rand() <= self.epsilon:
      return random.randrange(self.action_size)
    act_values = self.model.predict(state)
    return np.argmax(act_values[0])

  def replay(self, batch_size):

    minibatch = random.sample(self.memory, batch_size)

    for state, action, reward, next_state, done in minibatch:
      target = reward
      if not done:
        target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
        target_f = self.model.predict(state)
        target_f[0][action] = target

        self.model.fit(state, target_f, epochs=1, verbose=0)

      if self.epsilon > self.epsilon_min:
        self.epsilon *= self.epsilon_decay

    def load(self, name):
      self.model.load_weights(name)
