In [1]:
from environment import City

import pygame
import random
import numpy as np
from keras import Sequential
from collections import deque
from keras.layers import Dense
import matplotlib.pyplot as plt
from keras.optimizers import adam

env = City()
np.random.seed(0)


class DQN:

    """ Implementation of deep q learning algorithm """

    def __init__(self, action_space, state_space):

        self.action_space = action_space
        self.state_space = state_space
        self.epsilon = 1
        self.gamma = .95
        self.batch_size = 64
        self.epsilon_min = .01
        self.epsilon_decay = .995
        self.learning_rate = 0.001
        self.memory = deque(maxlen=100000)
        self.model = self.build_model()

    def build_model(self):

        model = Sequential()
        model.add(Dense(64, input_shape=(self.state_space,), activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_space, activation='linear'))
        model.compile(loss='mse', optimizer=adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):

        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_space)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self):

        if len(self.memory) < self.batch_size:
            return

        minibatch = random.sample(self.memory, self.batch_size)
        states = np.array([i[0] for i in minibatch])
        actions = np.array([i[1] for i in minibatch])
        rewards = np.array([i[2] for i in minibatch])
        next_states = np.array([i[3] for i in minibatch])
        dones = np.array([i[4] for i in minibatch])

        states = np.squeeze(states)
        next_states = np.squeeze(next_states)

        targets = rewards + self.gamma*(np.amax(self.model.predict_on_batch(next_states), axis=1))*(1-dones)
        targets_full = self.model.predict_on_batch(states)

        ind = np.array([i for i in range(self.batch_size)])
        targets_full[[ind], [actions]] = targets

        self.model.fit(states, targets_full, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


def train_dqn(episode):

    loss = []
    agent = DQN(5, 4)
    for e in range(episode):
        state = env.reset()
        state = np.reshape(state, (1, 4))
        score = 0
        max_steps = 1000
        for i in range(max_steps):
            action = agent.act(state)
            reward, next_state, done = env.step(action)
            score += reward
            
            next_state = np.reshape(next_state, (1, 4))
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            agent.replay()
            if done:
                break
        loss.append(score)
        print("episode: {}/{}, score: {}".format(e, episode, score))
    return loss


pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


Using TensorFlow backend.


In [None]:
ep = 1000
loss = train_dqn(ep)
pygame.quit()
plt.plot([i for i in range(ep)], loss)
plt.xlabel('episodes')
plt.ylabel('reward')
plt.show()

episode: 0/1000, score: -43.406917946140396
episode: 1/1000, score: -29.765924443442138
episode: 2/1000, score: -2.7403263581369117
episode: 3/1000, score: -12.21967914575941
episode: 4/1000, score: -17.114835192865492
episode: 5/1000, score: -10.584398848996209
episode: 6/1000, score: 16.58276253029822
episode: 7/1000, score: -1.5732620709236933
episode: 8/1000, score: 10.7256820872158
episode: 9/1000, score: -4.145395466519075
episode: 10/1000, score: -40.80445947224669
episode: 11/1000, score: -4.093425668540062
episode: 12/1000, score: 3.9759139909209438
episode: 13/1000, score: -17.1450135385046
episode: 14/1000, score: -5.772443183618847
episode: 15/1000, score: 8.807215001806515
episode: 16/1000, score: 9.694659848091959
episode: 17/1000, score: 5.680766581838359
episode: 18/1000, score: 13.26724800713972
episode: 19/1000, score: 5.653097563788805
episode: 20/1000, score: -18.1944728827292
episode: 21/1000, score: 0.319660112501051
episode: 22/1000, score: -29.389187656484737
ep

episode: 187/1000, score: 12.384776310850235
episode: 188/1000, score: 25.42534080710379
episode: 189/1000, score: 23.246211251235323
episode: 190/1000, score: 3.313616345243978
episode: 191/1000, score: 29.649655434629018
episode: 192/1000, score: 29.570660511172846
episode: 193/1000, score: 6.754107822777396
episode: 194/1000, score: 17.772378655320757
episode: 195/1000, score: 13.680329730474352
episode: 196/1000, score: 7.31652720338111
episode: 197/1000, score: 40.619296778199335
episode: 198/1000, score: 6.342797551819753
episode: 199/1000, score: -6.412260398118587
episode: 200/1000, score: 33.5440947723653
episode: 201/1000, score: 28.38552708502482
episode: 202/1000, score: 24.979501308256342
episode: 203/1000, score: -33.440437836512785
episode: 204/1000, score: -28.326730679364285
episode: 205/1000, score: 14.404760348684759
episode: 206/1000, score: 22.804731363457133
episode: 207/1000, score: 26.813207915827967
episode: 208/1000, score: 35.45742752749558
episode: 209/1000,

episode: 373/1000, score: 20.595023109728988
episode: 374/1000, score: 30.68332812825267
episode: 375/1000, score: 23.828803309176934
episode: 376/1000, score: 13.038404810405298
episode: 377/1000, score: 37.19412236331675
episode: 378/1000, score: 3.3876218732651218
episode: 379/1000, score: 12.337722339831622
episode: 380/1000, score: 3.7360708140110503
episode: 381/1000, score: 21.817821063276355
episode: 382/1000, score: 9.350681614207582
episode: 383/1000, score: 31.0
episode: 384/1000, score: 24.748809496813376
episode: 385/1000, score: 38.42150822589607
episode: 386/1000, score: 25.849433241279208
episode: 387/1000, score: 40.110809933523555
episode: 388/1000, score: 34.496913462633174
episode: 389/1000, score: 32.98332962798263
episode: 390/1000, score: 8.013155617496427
episode: 391/1000, score: 41.5
episode: 392/1000, score: 13.780109889280517
episode: 393/1000, score: 34.99019513592785
episode: 394/1000, score: 25.115528128088304
episode: 395/1000, score: 24.029964086141668
