# CS4049 Assignment 2 - Atari Deep Learning

In [12]:
# !pip install tensorflow==2.3.1 gym keras-rl2 gym[atari]

In [5]:
# !pip install keras-rl2

Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
Collecting tensorflow-estimator<2.4.0,>=2.3.0
  Downloading tensorflow_estimator-2.3.0-py2.py3-none-any.whl (459 kB)
Collecting numpy<1.19.0,>=1.16.0
  Downloading numpy-1.18.5-cp38-cp38-win_amd64.whl (12.8 MB)
Collecting scipy==1.4.1
  Downloading scipy-1.4.1-cp38-cp38-win_amd64.whl (31.0 MB)


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\Cael\\anaconda3\\Lib\\site-packages\\~umpy\\core\\_multiarray_tests.cp38-win_amd64.pyd'
Consider using the `--user` option or check the permissions.



Collecting gast==0.3.3
  Downloading gast-0.3.3-py2.py3-none-any.whl (9.7 kB)
Installing collected packages: numpy, tensorflow-estimator, scipy, gast, keras-rl2
  Attempting uninstall: numpy
    Found existing installation: numpy 1.20.1
    Uninstalling numpy-1.20.1:
      Successfully uninstalled numpy-1.20.1


In [1]:
# Import Libraries
import gym
import atari_py
import random

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

# Test Random Environment

In [4]:
# Create game environment
env = gym.make('Assault-v0')
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [5]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'UP', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [6]:
episodes = 5
for episode in range(1, episodes + 1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = random.choice([0, 1, 2, 3, 4, 5])
        n_state, reward, done, info = env.step(action)
        score += reward

    print('Episode:{} Score:{}'.format(episode, score))

env.close()

Episode:1 Score:147.0
Episode:2 Score:378.0
Episode:3 Score:399.0
Episode:4 Score:189.0
Episode:5 Score:441.0


# Reinforcement Learning with Frames

## Build Model with Keras (Building the neural network)

In [7]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [8]:
del model

NameError: name 'model' is not defined

In [9]:
model = build_model(height, width, channels, actions)

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 3, 61, 39, 32)     6176      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 3, 29, 18, 64)     32832     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 27, 16, 64)     36928     
_________________________________________________________________
flatten (Flatten)            (None, 82944)             0         
_________________________________________________________________
dense (Dense)                (None, 512)               42467840  
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_2 (Dense)              (None, 7)                 1

## Build Agent with Keras-RL

In [11]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   enable_dueling_network=True, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup=1000
                   )
    return dqn

In [12]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))

In [13]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

Training for 10000 steps ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
  592/10000: episode: 1, duration: 38.498s, episode steps: 592, steps per second:  15, episode reward: 210.000, mean reward:  0.355 [ 0.000, 21.000], mean action: 3.034 [0.000, 6.000],  loss: --, mean_q: --, mean_eps: --
 1102/10000: episode: 2, duration: 221.526s, episode steps: 510, steps per second:   2, episode reward: 189.000, mean reward:  0.371 [ 0.000, 21.000], mean action: 2.978 [0.000, 6.000],  loss: 76.969069, mean_q: 14.124125, mean_eps: 0.905410
 1830/10000: episode: 3, duration: 817.713s, episode steps: 728, steps per second:   1, episode reward: 231.000, mean reward:  0.317 [ 0.000, 21.000], mean action: 2.953 [0.000, 6.000],  loss: 2.629722, mean_q: 12.041523, mean_eps: 0.868105
done, took 1290.590 seconds


<tensorflow.python.keras.callbacks.History at 0x20a0a24eac0>

In [16]:
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...


KeyboardInterrupt: 

# Reinforcement Learning with RAM

# Reinforcement Learning by Mixing Screen and RAM

## Save model to file

In [15]:
dqn.save_weights("ScreenLearningFirstModel.h5f")

In [None]:
dqn.load_weights("ScreenLearningFirstModel.h5f")