# <span style="color:teal"> Deep Reinforcement Learning for Atari Enduro-v0 </span>

### Import Required Packages

In [None]:
from time import sleep

import gym
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image

from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Flatten, Dropout, concatenate, Permute
from keras.layers import Input, Conv2D
from keras.optimizers import Adam
from keras.activations import relu, linear
from keras.layers.advanced_activations import LeakyReLU

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor

***
### RoadRunner Environment

In [None]:
env = gym.make('Enduro-v0')

env.render()
sleep(1)
env.close()

#### 1. *Number of possible action*

In [None]:
nb_actions = env.action_space.n
print('Total number of Possible actoin is :', nb_actions)

#### 2. *Taking stack of 4 consecutive frames*

In [None]:
frame_shape = (84, 84)
window_length = 4
input_shape = (window_length,) + frame_shape
print('Input Shape is :', input_shape)

***
### Defining class for pre-processing the game_frames

In [None]:
class GameProcess(Processor):
    def process_observation(self, observation):
        img = Image.fromarray(observation)
        img = np.array(img.resize(frame_shape).convert('L'))
        return img.astype('uint8')  

    def process_state_batch(self, batch):
        Processed_batch = batch.astype('float32') / 255.
        return Processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

***
## DeepMind Architecture

In [None]:
model = Sequential()
model.add(Permute((2, 3, 1), input_shape=input_shape))
model.add(Conv2D(32, (8, 8), strides=(4, 4)))
model.add(Activation('relu'))
model.add(Conv2D(64, (4, 4), strides=(2, 2)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3), strides=(1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

***
### Configuring the Agent

#### 1. *Allocating memory for experience replay*

In [None]:
memory = SequentialMemory(limit=1000000, window_length=window_length)

#### 2.* Policy: Epsilon Greedy Exploration*
<span style="color:teal">*Gradually exploration will be decreased*</span>

In [None]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000)

#### 3. *Compiling DQN Agent*

In [None]:
dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=GameProcess(),
               nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.)

In [None]:
dqn.compile(Adam(lr=.00025), metrics=['mae'])

***
## <span style="color:teal"> Training the model </span>

#### 1. *Check if Agent is learning for first 0.5M Steps*

In [None]:
history = dqn.fit(env, nb_steps=500000, visualize=True)

#### 2. *Summarizing the training history*

In [None]:
plt.plot(history.history['episode_reward'])
plt.title('Training for 0.3 million steps')
plt.legend(['Episode reward'], loc='upper right')
plt.show()

plt.plot(history.history['nb_episode_steps'])
plt.title('Training for 0.3 million steps')
plt.legend(['No. of episode steps'], loc='upper right')
plt.show()

#### 3. *Saving the weights*

In [None]:
dqn.save_weights('dqn_atari_Enduro.h5f', overwrite=True)

### <span style="color:red">**-  -  Caution   -  -**</span>

### <span style="color:teal">Re-Training the model (for 2M steps)  </span>
*Loading the saved weights (of 0.3M steps)*

In [None]:
dqn.load_weights('dqn_atari_Enduro.h5f')

In [None]:
dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=GameProcess(),
               gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.)

dqn.compile(Adam(lr=0.00025), metrics=['mae'])

In [None]:
history2M = dqn.fit(env, nb_steps=2000000)

*Summarizing the training history*

In [None]:
plt.plot(history2M.history['episode_reward'])
plt.title('Training for 2 million steps')
plt.legend(['Episode reward'], loc='upper right')
plt.show()

plt.plot(history2M.history['nb_episode_steps'])
plt.title('Training for 2 million steps')
plt.legend(['No. of episode steps'], loc='upper right')
plt.show()

#### 4. *Saving final weights*

In [None]:
dqn.save_weights('dqn_atari_Enduro.h5f', overwrite=True)

***
## <span style="color:teal"> Testing the model </span>

#### 1. Loading the weights for testing

In [None]:
dqn.load_weights('dqn_atari_Enduro.h5f')

#### 2. Resetting the environment for testing

In [None]:
env.reset()
dqn.test(env, nb_episodes=2, visualize=True)

In [None]:
env.close()

In [None]:
# Trained on: Intel® Xeon® Processor E5, 2.40 GHz, Nvidia Quadro K4200
# Bhartendu Thakur, Machine Learning & Computing
# https://in.mathworks.com/matlabcentral/profile/authors/10083740-bhartendu?&detail=fileexchange
# https://in.linkedin.com/in/bhartendu-thakur-56bb6285