# CS4049 Assignment 2 - Atari Deep Learning

In [3]:
# !pip install tensorflow==2.3.1 gym keras-rl2 gym[atari]
!pip install atari_py==0.2.6 gym==0.17.2 keras-rl2

Collecting atari_py==0.2.6
  Using cached atari_py-0.2.6-cp37-cp37m-manylinux1_x86_64.whl (2.8 MB)
Collecting gym==0.17.2
  Using cached gym-0.17.2.tar.gz (1.6 MB)
Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[K     |████████████████████████████████| 52 kB 679 kB/s 
Reason for being yanked: re-release with new wheels[0m
Building wheels for collected packages: gym
  Building wheel for gym (setup.py) ... [?25l[?25hdone
  Created wheel for gym: filename=gym-0.17.2-py3-none-any.whl size=1650889 sha256=405ddea6e0662f51c7ec6dfbd23adea64aeafe1dd77511641d62fbde0e397f2c
  Stored in directory: /root/.cache/pip/wheels/18/e1/58/89a2aa24e6c2cc800204fc02010612afdf200926c4d6bfe315
Successfully built gym
Installing collected packages: keras-rl2, gym, atari-py
  Attempting uninstall: gym
    Found existing installation: gym 0.17.3
    Uninstalling gym-0.17.3:
      Successfully uninstalled gym-0.17.3
  Attempting uninstall: atari-py
    Found existing installation: at

In [4]:
# Import Libraries
import gym
import atari_py
import random

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [5]:
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7



# Test Random Environment

In [6]:
# Create game environment
env = gym.make('Assault-v0')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [None]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'UP', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [None]:
episodes = 5
for episode in range(1, episodes + 1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = random.choice([0, 1, 2, 3, 4, 5])
        n_state, reward, done, info = env.step(action)
        score += reward

    print('Episode:{} Score:{}'.format(episode, score))

env.close()

Episode:1 Score:168.0
Episode:2 Score:252.0
Episode:3 Score:189.0
Episode:4 Score:252.0
Episode:5 Score:231.0


# Reinforcement Learning with Frames

## Build Model with Keras (Building the neural network)

In [None]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
del model

NameError: name 'model' is not defined

In [None]:
model = build_model(height, width, channels, actions)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 3, 61, 39, 32)     6176      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 3, 29, 18, 64)     32832     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 27, 16, 64)     36928     
_________________________________________________________________
flatten (Flatten)            (None, 82944)             0         
_________________________________________________________________
dense (Dense)                (None, 512)               42467840  
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 7)                 1

## Build Agent with Keras-RL

In [None]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   enable_dueling_network=True, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup=1000
                   )
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))

In [None]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

In [None]:
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Episode 1: reward: 189.000, steps: 868


KeyboardInterrupt: 

# Reinforcement Learning with RAM

# Reinforcement Learning by Mixing Screen and RAM