# CS4049 Assignment 2 - Atari Deep Learning

In [60]:
# !pip install tensorflow==2.3.1 gym keras-rl2 gym[atari]
# !apt-get install -y xvfb python-opengl ffmeg > /def/null 2>&1

!pip install atari_py==0.2.6 gym==0.17.2 keras-rl2 pyglet
!pip install -U colabgymrender

/bin/bash: /def/null: No such file or directory


In [61]:
# Import Libraries
import gym
import atari_py
import random
from colabgymrender.recorder import Recorder

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

import pyvirtualdisplay


_display = pyvirtualdisplay.Display(visible=False,  # use False with Xvfb
                                    size=(1400, 900))
_ = _display.start()

In [62]:
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7



# Test Random Environment

In [63]:
# Create game environment
env = gym.make('Assault-v0')
env = Recorder(env, './video')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [64]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'UP', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [65]:
episodes = 5
for episode in range(1, episodes + 1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        action = random.choice([0, 1, 2, 3, 4, 5])
        n_state, reward, done, info = env.step(action)
        score += reward

    print('Episode:{} Score:{}'.format(episode, score))

env.close()

Episode:1 Score:210.0
Episode:2 Score:357.0
Episode:3 Score:273.0
Episode:4 Score:273.0
Episode:5 Score:294.0


# Reinforcement Learning with Frames

## Build Model with Keras (Building the neural network)

In [66]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [67]:
del model

In [68]:
model = build_model(height, width, channels, actions)

In [69]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 3, 61, 39, 32)     6176      
                                                                 
 conv2d_10 (Conv2D)          (None, 3, 29, 18, 64)     32832     
                                                                 
 conv2d_11 (Conv2D)          (None, 3, 27, 16, 64)     36928     
                                                                 
 flatten_3 (Flatten)         (None, 82944)             0         
                                                                 
 dense_12 (Dense)            (None, 512)               42467840  
                                                                 
 dense_13 (Dense)            (None, 256)               131328    
                                                                 
 dense_14 (Dense)            (None, 7)                

## Build Agent with Keras-RL

In [70]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   enable_dueling_network=True, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup=1000
                   )
    return dqn

In [71]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))

In [72]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)

Training for 1000 steps ...
Interval 1 (0 steps performed)
 1000/10000 [==>...........................] - ETA: 3:00 - reward: 0.4410done, took 20.461 seconds


<keras.callbacks.History at 0x7f3e6af1bc50>

In [None]:
scores = dqn.test(env, nb_episodes=10, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:
env.play()

# Reinforcement Learning with RAM

# Reinforcement Learning by Mixing Screen and RAM