## ATARI Asteroids DQN_gym with keras-rl

In [2]:
import numpy as no
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [21]:
ENV_NAME_2 = 'Asteroids-v0'

In [22]:
# Get the environment and extract the number of actions
env = gym.make(ENV_NAME_2)
nb_actions = env.action_space.n
nb_actions

14

In [23]:
# Next, we build a neural network model
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(3, activation= 'tanh')) # One layer of 3 units with tanh activation function 
model.add(Dense(nb_actions))
model.add(Activation('sigmoid')) # one layer of 1 unit with sigmoid activation function
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_4 (Flatten)          (None, 100800)            0         
_________________________________________________________________
dense_8 (Dense)              (None, 3)                 302403    
_________________________________________________________________
dense_9 (Dense)              (None, 14)                56        
_________________________________________________________________
activation_6 (Activation)    (None, 14)                0         
Total params: 302,459
Trainable params: 302,459
Non-trainable params: 0
_________________________________________________________________
None


In [24]:
# Configure and compile the agent. Use every built-in Keras optimizer and metrics!
memory = SequentialMemory(limit=20000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
              target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [26]:
## Visualize the training during 20000 steps 
dqn.fit(env, nb_steps=20000, visualize=True, verbose=2)

Training for 20000 steps ...
  1454/20000: episode: 1, duration: 41.667s, episode steps: 1454, steps per second: 35, episode reward: 1030.000, mean reward: 0.708 [0.000, 100.000], mean action: 6.395 [0.000, 13.000], mean observation: 1.887 [0.000, 240.000], loss: 24.489462, mean_absolute_error: 0.974011, mean_q: 0.998769
  3841/20000: episode: 2, duration: 68.027s, episode steps: 2387, steps per second: 35, episode reward: 1530.000, mean reward: 0.641 [0.000, 100.000], mean action: 6.554 [0.000, 13.000], mean observation: 1.817 [0.000, 240.000], loss: 24.838045, mean_absolute_error: 0.975894, mean_q: 0.999373
  4230/20000: episode: 3, duration: 11.115s, episode steps: 389, steps per second: 35, episode reward: 160.000, mean reward: 0.411 [0.000, 50.000], mean action: 6.812 [0.000, 13.000], mean observation: 2.693 [0.000, 240.000], loss: 24.809513, mean_absolute_error: 0.975669, mean_q: 0.999609
  4804/20000: episode: 4, duration: 16.322s, episode steps: 574, steps per second: 35, episo

<keras.callbacks.History at 0x122d76c88>

In [27]:
## Save the model 
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

In [29]:
# Evaluate the algorithm for 10 episodes 
dqn.test(env, nb_episodes=10, visualize=True)

Testing for 10 episodes ...
Episode 1: reward: 140.000, steps: 2557
Episode 2: reward: 140.000, steps: 2557
Episode 3: reward: 140.000, steps: 2569
Episode 4: reward: 140.000, steps: 2592
Episode 5: reward: 140.000, steps: 2564
Episode 6: reward: 140.000, steps: 2578
Episode 7: reward: 140.000, steps: 2564
Episode 8: reward: 140.000, steps: 2578
Episode 9: reward: 140.000, steps: 2543
Episode 10: reward: 140.000, steps: 2570


<keras.callbacks.History at 0x12d79df98>