Dervla Gargan - 22346279
Mark Langtry - 22340475
Amy McMahon - 22346619

Code executed without errors :)

# Imports

In [126]:
import gym
import random
from keras import Sequential
from collections import deque
from keras.layers import Dense
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from keras.activations import relu, linear
import numpy as np



# Create environment

In [127]:
env = gym.make('MountainCar-v0', render_mode="human")

# DQN Agent

In [128]:
class DQN:

    """ Implementation of deep q learning algorithm """

    def __init__(self, actionSpace, stateSpace):

        self.actionSpace = actionSpace
        self.stateSpace = stateSpace
        self.epsilon = 1.0 #
        self.gamma = .95
        self.batchSize = 64
        self.epsilonMin = .01 #
        self.lr = 0.001
        self.epsilonDecay = .995 #
        self.memory = deque(maxlen=100000)
        self.model = self.build_model()

    def build_model(self):

        model = Sequential()
        model.add(Dense(24, input_dim=self.stateSpace, activation=relu))
        model.add(Dense(24, activation=relu))
        model.add(Dense(self.actionSpace, activation=linear))
        model.compile(loss='mse', optimizer=Adam(lr=self.lr))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state): # epsilon greedy policy

        if np.random.rand() <= self.epsilon:
            return random.randrange(self.actionSpace)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self):

        if len(self.memory) < self.batchSize:
            return

        smallBatch = random.sample(self.memory, self.batchSize)
        sb_states = np.array([i[0] for i in smallBatch])
        sb_actions = np.array([i[1] for i in smallBatch])
        sb_rewards = np.array([i[2] for i in smallBatch])
        sb_nextStates = np.array([i[3] for i in smallBatch])
        sb_dones = np.array([i[4] for i in smallBatch])

        sb_states = np.squeeze(sb_states)
        sb_nextStates = np.squeeze(sb_nextStates)

        targets = sb_rewards + self.gamma*(np.amax(self.model.predict_on_batch(sb_nextStates), axis=1))*(1-sb_dones)
        targets_full = self.model.predict_on_batch(sb_states)

        ind = np.array([i for i in range(self.batchSize)])
        targets_full[[ind], [sb_actions]] = targets

        self.model.fit(sb_states, targets_full, epochs=1, verbose=0)
        if self.epsilon > self.epsilonMin:
            self.epsilon *= self.epsilonDecay



# Get Reward for agent

In [129]:
def get_reward(state):

    if state[0] >= 0.5:
        print("The car has sucessfully reached the goal")
        return 10
    if state[0] > -0.4:
        return (1+state[0])**2
    return 0


# Code to train the DQN agent

In [130]:

def train_dqn(episode):

    lossList = []
    dqn_agent = DQN(env.action_space.n, env.observation_space.shape[0])
    for e in range(episode):
        state = env.reset()
        state = np.array(state[0])
        state = np.reshape(state, (1, dqn_agent.stateSpace))
        score = 0
        max_steps = 100
        for i in range(max_steps):
            action = dqn_agent.act(state)
            env.render()

            next_state, reward, done, _, _ = env.step(action)
            reward = get_reward(next_state)
            score += reward
            next_state = np.reshape(next_state, (1, dqn_agent.stateSpace))
            dqn_agent.remember(state, action, reward, next_state, done)
            state = next_state
            dqn_agent.replay()
            if done:
                print("episode: {}/{}, score: {}".format(e, episode, score))
                break
        lossList.append(score)
    return lossList



In [131]:

def random_policy(episode, step):

    for i_episode in range(episode):
        env.reset()
        for t in range(step):
            env.render()
            action = env.actionSpace.sample()
            state, reward, done, info = env.step(action)
            if done:
                print("The Episode finished running after {} timesteps".format(t+1))
                break
            print("Beginning the next epsiode")



In [132]:

if __name__ == '__main__':
    numOfEpisodes = 60
    loss = train_dqn(numOfEpisodes)
    plt.plot([i+1 for i in range(episodes)], loss)
    plt.show()



