<h1>Deep Q Learning - Cart Pole</h1>

<br>

<h2>Import dependencies</h2>

In [1]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from matplotlib import pyplot as plt

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


<h2>Agent</h2>

In [2]:
EPISODES = 500

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma *
                          np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

<h2>Training</h2>

In [3]:
if __name__ == "__main__":
    env = gym.make('CartPole-v1')
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    # agent.load("/home/jack/Desktop/cartpole-dqn.h5")
    done = False
    batch_size = 32

    for e in range(EPISODES):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        for time in range(500):
            # env.render()
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            reward = reward if not done else -10
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            if done:
                print("episode: {}/{}, score: {}, e: {:.2}"
                      .format(e, EPISODES, time, agent.epsilon))
                break
            if len(agent.memory) > batch_size:
                agent.replay(batch_size)
        #if e % 10 == 0:
        #    agent.save("/home/jack/Desktop/cartpole-dqn.h5")





episode: 0/500, score: 25, e: 1.0


episode: 1/500, score: 23, e: 0.92
episode: 2/500, score: 20, e: 0.83
episode: 3/500, score: 19, e: 0.76
episode: 4/500, score: 10, e: 0.72
episode: 5/500, score: 8, e: 0.69
episode: 6/500, score: 11, e: 0.65
episode: 7/500, score: 21, e: 0.59
episode: 8/500, score: 23, e: 0.52
episode: 9/500, score: 16, e: 0.48
episode: 10/500, score: 11, e: 0.46
episode: 11/500, score: 20, e: 0.41
episode: 12/500, score: 13, e: 0.39
episode: 13/500, score: 11, e: 0.37
episode: 14/500, score: 8, e: 0.35
episode: 15/500, score: 8, e: 0.34
episode: 16/500, score: 11, e: 0.32
episode: 17/500, score: 10, e: 0.3
episode: 18/500, score: 12, e: 0.29
episode: 19/500, score: 9, e: 0.27
episode: 20/500, score: 10, e: 0.26
episode: 21/500, score: 9, e: 0.25
episode: 22/500, score: 9, e: 0.24
episode: 23/500, score: 13, e: 0.22
episode: 24/500, score: 27, e: 0.2
episode: 25/500, score: 11, e: 0.18
episode: 26/500, score: 9, e: 0.18
episode: 27/500, score: 9, e: 0.17
episode

episode: 191/500, score: 38, e: 0.01
episode: 192/500, score: 62, e: 0.01
episode: 193/500, score: 96, e: 0.01
episode: 194/500, score: 215, e: 0.01
episode: 195/500, score: 319, e: 0.01
episode: 196/500, score: 225, e: 0.01
episode: 197/500, score: 309, e: 0.01
episode: 198/500, score: 403, e: 0.01
episode: 199/500, score: 243, e: 0.01
episode: 200/500, score: 166, e: 0.01
episode: 201/500, score: 215, e: 0.01
episode: 202/500, score: 206, e: 0.01
episode: 203/500, score: 133, e: 0.01
episode: 204/500, score: 189, e: 0.01
episode: 205/500, score: 23, e: 0.01
episode: 206/500, score: 212, e: 0.01
episode: 207/500, score: 302, e: 0.01
episode: 208/500, score: 302, e: 0.01
episode: 209/500, score: 205, e: 0.01
episode: 210/500, score: 356, e: 0.01
episode: 211/500, score: 188, e: 0.01
episode: 212/500, score: 236, e: 0.01
episode: 213/500, score: 143, e: 0.01
episode: 214/500, score: 115, e: 0.01
episode: 215/500, score: 10, e: 0.01
episode: 216/500, score: 8, e: 0.01
episode: 217/500, s

episode: 413/500, score: 15, e: 0.01
episode: 414/500, score: 18, e: 0.01
episode: 415/500, score: 12, e: 0.01
episode: 416/500, score: 17, e: 0.01
episode: 417/500, score: 14, e: 0.01
episode: 418/500, score: 16, e: 0.01
episode: 419/500, score: 25, e: 0.01
episode: 420/500, score: 16, e: 0.01
episode: 421/500, score: 32, e: 0.01
episode: 422/500, score: 124, e: 0.01
episode: 423/500, score: 499, e: 0.01
episode: 424/500, score: 491, e: 0.01
episode: 425/500, score: 44, e: 0.01
episode: 426/500, score: 103, e: 0.01
episode: 427/500, score: 122, e: 0.01
episode: 428/500, score: 68, e: 0.01
episode: 429/500, score: 299, e: 0.01
episode: 430/500, score: 45, e: 0.01
episode: 431/500, score: 153, e: 0.01
episode: 432/500, score: 341, e: 0.01
episode: 433/500, score: 49, e: 0.01
episode: 434/500, score: 9, e: 0.01
episode: 435/500, score: 420, e: 0.01
episode: 436/500, score: 8, e: 0.01
episode: 437/500, score: 9, e: 0.01
episode: 438/500, score: 9, e: 0.01
episode: 439/500, score: 9, e: 0.

<h2>Out of 500 games, the agent got a perfect score 11 times.
<br>
    The first perfect score was on game 174.</h2>