In [None]:
!pip install gym[atari]
!pip install stable-baselines3[extra]
!pip install keras-rl2

In [None]:
!pip install gupload

# IMPORTS

In [3]:
from pydrive.auth import GoogleAuth
from google.colab import auth
import tensorflow as tf
from PIL import Image
import numpy as np
import gym
import tensorflow as tf
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint
import matplotlib.pyplot as plt

# Authenticate and create the PyDrive client.
auth.authenticate_user()

# GAME ENV PREPROCESS

In [4]:
inputShape = (84, 84)
windowLength = 4

class AtariProcessor(Processor):
    def process_observation(self, obs):
        assert obs.ndim == 3 
        imgs = Image.fromarray(obs)
        imgs = imgs.resize(inputShape).convert('L')  
        obs = np.array(imgs)
        assert obs.shape == inputShape
        return obs.astype('uint8')  

    def process_state_batch(self, batch):
        batch = batch.astype('float32') / 255.
        return batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)


# instantiate game env

In [5]:
env = gym.make('SpaceInvadersDeterministic-v4')
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
print('number of actions',nb_actions)
height ,width, channels = env.observation_space.shape
print('types of actions:',env.unwrapped.get_action_meanings())
print('height:{} width:{} channels:{}'.format(height,width, channels))

number of actions 6
types of actions: ['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']
height:210 width:160 channels:3


# build CNN model described by Mnih et al.

In [6]:

input_shape = (windowLength,) + inputShape
print(input_shape)

model1 = tf.keras.models.Sequential()
model1.add(tf.keras.layers.Permute((2, 3, 1), input_shape=input_shape))
model1.add(tf.keras.layers.Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=input_shape))
model1.add(tf.keras.layers.Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
model1.add(tf.keras.layers.Convolution2D(64, (3,3), strides=(1,1), activation='relu'))
model1.add(tf.keras.layers.Flatten())
model1.add(tf.keras.layers.Dense(512, activation='relu'))
model1.add(tf.keras.layers.Dense(env.action_space.n, activation="linear"))
 
    
model1.summary()



(4, 84, 84)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 permute (Permute)           (None, 84, 84, 4)         0         
                                                                 
 conv2d (Conv2D)             (None, 20, 20, 32)        8224      
                                                                 
 conv2d_1 (Conv2D)           (None, 9, 9, 64)          32832     
                                                                 
 conv2d_2 (Conv2D)           (None, 7, 7, 64)          36928     
                                                                 
 flatten (Flatten)           (None, 3136)              0         
                                                                 
 dense (Dense)               (None, 512)               1606144   
                                                                 
 dense_1 (Dense)             (None, 6)      

# setting parameters

In [7]:
memorylimit = 10000
innerpolicy = EpsGreedyQPolicy()
#innerpolicy = BoltzmannQPolicy()
maxEps = 1.0
minEps = 0.1
testEps = 0.05
annealSteps = 200000
processor = AtariProcessor()
warmup = 50000
discount = 0.99
target_model_update = 10000
train_interval = 4
delta_clip = 1.0
lr = 0.00025
trainingSteps = 1000000
trainingLogInterval = 10000


# configure and compile the dqn agent




In [8]:
memory = SequentialMemory(limit=memorylimit, window_length=windowLength)
policy = LinearAnnealedPolicy(innerpolicy, attr='eps', value_max=maxEps, value_min=minEps, value_test=testEps,nb_steps=annealSteps)
#policy = BoltzmannQPolicy(tau=1.)
dqn = DQNAgent(model=model1, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor,enable_double_dqn=False,enable_dueling_network=True, dueling_type='avg',  nb_steps_warmup=warmup, gamma=discount, target_model_update=target_model_update,
               train_interval=train_interval, delta_clip=delta_clip)
#adamOptimizer = adam_v2.Adam(learning_rate=0.00025)
dqn.compile(tf.keras.optimizers.Adam(learning_rate=lr), metrics=[tf.keras.metrics.RootMeanSquaredError()])


# train the agent

In [None]:
env_name = 'SpaceInvadersDeterministic-v4'
weights_filename = 'dqn_{}_weights.h5f'.format(env_name)
checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
log_filename = 'dqn_{}_log.json'.format(env_name)
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=200000)]
callbacks += [FileLogger(log_filename, interval=100)]
trainLog = dqn.fit(env, callbacks=callbacks, nb_steps=trainingSteps, log_interval=trainingLogInterval)

# upload logs and models to drive

In [None]:
!gupload --to '1qvVYLfN8dghO2hSVGs9ruVDZzPV4rEux' *.h5f.*
!gupload --to '1qvVYLfN8dghO2hSVGs9ruVDZzPV4rEux' dqn_SpaceInvadersDeterministic-v4_log.json