# 0. Install Dependencies

In [4]:
!pip install tensorflow==2.3.1 gym keras-rl2 gym[atari]

Collecting tensorflow==2.3.1
[?25l  Downloading https://files.pythonhosted.org/packages/eb/18/374af421dfbe74379a458e58ab40cf46b35c3206ce8e183e28c1c627494d/tensorflow-2.3.1-cp37-cp37m-manylinux2010_x86_64.whl (320.4MB)
[K     |████████████████████████████████| 320.4MB 45kB/s 
Collecting keras-rl2
[?25l  Downloading https://files.pythonhosted.org/packages/b6/fc/143ee05aed804b3b9052d7b17b13832bc7f3c28e7b1bc50edd09c29d8525/keras_rl2-1.0.5-py3-none-any.whl (52kB)
[K     |████████████████████████████████| 61kB 9.8MB/s 
Collecting numpy<1.19.0,>=1.16.0
[?25l  Downloading https://files.pythonhosted.org/packages/d6/c6/58e517e8b1fb192725cfa23c01c2e60e4e6699314ee9684a1c5f5c9b27e1/numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl (20.1MB)
[K     |████████████████████████████████| 20.1MB 1.4MB/s 
Collecting tensorflow-estimator<2.4.0,>=2.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/e9/ed/5853ec0ae380cba4588eab1524e18ece1583b65f7ae0e97321f5ff9dfd60/tensorflow_estimator-2.3.0-py2

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
!python -m atari_py.import_roms drive/MyDrive/ROMS/

copying mr_do.bin from drive/MyDrive/ROMS/Mr. Do! (1983) (CBS Electronics, Ed English) (4L4478) (PAL).bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/mr_do.bin
copying lost_luggage.bin from drive/MyDrive/ROMS/Lost Luggage (Airport Mayhem) (1982) (Apollo - Games by Apollo, Larry Minor, Ernie Runyon, Ed Salvo) (AP-2004) [no opening scene] ~.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/lost_luggage.bin
copying elevator_action.bin from drive/MyDrive/ROMS/Elevator Action (1983) (Atari, Dan Hitchens) (CX26126) (Prototype) ~.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/elevator_action.bin
copying asterix.bin from drive/MyDrive/ROMS/Asterix (AKA Taz) (1984) (Atari, Jerome Domurat, Steve Woita) (CX2696).bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/asterix.bin
copying riverraid.bin from drive/MyDrive/ROMS/River Raid (1982) (Activision, Carol Shaw) (AX-020, AX-020-04) ~.bin to /usr/local/lib/python3.7/dist-packages/a

# 1. Test Random Environment with OpenAI Gym

In [7]:
import gym 
import random

In [8]:
env = gym.make('SpaceInvaders-ram-v0')
# height, width, channels = env.observation_space.shape
ram_obs = env.observation_space.shape[0]
actions = env.action_space.n

In [9]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [10]:
ram_obs

128

In [11]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = random.choice([0,1,2,3,4,5])
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:120.0
Episode:2 Score:185.0
Episode:3 Score:105.0
Episode:4 Score:155.0
Episode:5 Score:155.0


# 2. Create a Deep Learning Model with Keras

In [12]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam

In [20]:
# def build_model(height, width, channels, actions):
    
def build_model(ram_obs, actions):
    model = Sequential()
    
    #model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    #model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    #model.add(Convolution2D(64, (3,3), activation='relu'))
    #model.add(Flatten())
    #model.add(Dense(512, activation='relu'))
    #model.add(Dense(256, activation='relu'))
    model.add(Flatten(input_shape=(1,128))),
    model.add(Dense(128, input_dim=ram_obs, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [19]:
del model

In [21]:
model = build_model(ram_obs, actions)


In [22]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_4 (Dense)              (None, 6)                 774       
Total params: 17,286
Trainable params: 17,286
Non-trainable params: 0
_________________________________________________________________


# 3. Build Agent with Keras-RL

In [16]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [23]:
def build_agent(model, actions):
    
    # balance exploration and exploitation and decay agent 
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=2000000)
    
    # provides a fast and efficient data structure that we can store the agent’s experiences in
    memory = SequentialMemory(limit=1000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg', 
                   nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn

In [24]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))

In [25]:
dqn.fit(env, nb_steps=2000000, visualize=False, verbose=3)
dqn.save_weights('SavedWeights/10k-Fast/dqn2dense005_weights.h5f')

Training for 1000000 steps ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
    394/1000000: episode: 1, duration: 0.550s, episode steps: 394, steps per second: 716, episode reward: 80.000, mean reward:  0.203 [ 0.000, 20.000], mean action: 2.487 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
    925/1000000: episode: 2, duration: 0.692s, episode steps: 531, steps per second: 767, episode reward: 25.000, mean reward:  0.047 [ 0.000, 10.000], mean action: 2.350 [0.000, 5.000],  loss: --, mean_q: --, mean_eps: --
   1649/1000000: episode: 3, duration: 4.288s, episode steps: 724, steps per second: 169, episode reward: 510.000, mean reward:  0.704 [ 0.000, 200.000], mean action: 2.439 [0.000, 5.000],  loss: 985.240651, mean_q: 289.089040, mean_eps: 0.998808
   2447/1000000: episode: 4, duration: 4.801s, episode steps: 798, steps per second: 166, episode reward: 155.000, mean reward:  0.194 [ 0.000, 30.000], mean a

In [29]:
scores = dqn.test(env, nb_episodes=50, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 50 episodes ...
Episode 1: reward: 100.000, steps: 360
Episode 2: reward: 105.000, steps: 662
Episode 3: reward: 435.000, steps: 1721
Episode 4: reward: 75.000, steps: 403
Episode 5: reward: 210.000, steps: 836
Episode 6: reward: 545.000, steps: 1325
Episode 7: reward: 270.000, steps: 983
Episode 8: reward: 15.000, steps: 387
Episode 9: reward: 180.000, steps: 826
Episode 10: reward: 110.000, steps: 687
Episode 11: reward: 460.000, steps: 961
Episode 12: reward: 315.000, steps: 1102
Episode 13: reward: 210.000, steps: 853
Episode 14: reward: 120.000, steps: 809
Episode 15: reward: 105.000, steps: 668
Episode 16: reward: 180.000, steps: 858
Episode 17: reward: 75.000, steps: 401
Episode 18: reward: 285.000, steps: 908
Episode 19: reward: 120.000, steps: 733
Episode 20: reward: 135.000, steps: 633
Episode 21: reward: 155.000, steps: 821
Episode 22: reward: 510.000, steps: 1070
Episode 23: reward: 215.000, steps: 942
Episode 24: reward: 260.000, steps: 995
Episode 25: reward: 

# 4. Reloading Agent from Memory

In [26]:
 dqn.save_weights('SavedWeights/10k-Fast/dqn2dense005_weights.h5f')

[TIP] Next time specify overwrite=True!


In [None]:
 #del model, dqn

In [None]:
dqn.load_weights('SavedWeights/10k-Fast/dqn2dense_weights.h5f') #2 dense layers

In [None]:
dqn.load_weights('SavedWeights/10k-Fast/dqn2_weights.h5f') #default values

In [None]:
dqn.load_weights('SavedWeights/10k-Fast/dqn2dense005_weights.h5f') #with value_test 0.05