In [23]:
import keras
import numpy as np
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
import gym

In [37]:
env = gym.make('MountainCar-v0')
print(f"Action Space : {env.action_space} \n Observation Space : {env.observation_space}")

Action Space : Discrete(3) 
 Observation Space : Box(2,)


In [38]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(1, env.observation_space.shape[0])),
    keras.layers.Dense(24, activation=keras.activations.relu),
    keras.layers.Dense(24, activation=keras.activations.relu),
    keras.layers.Dense(env.action_space.n, activation=keras.activations.linear)
])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_6 (Flatten)          (None, 2)                 0         
_________________________________________________________________
dense_9 (Dense)              (None, 24)                72        
_________________________________________________________________
dense_10 (Dense)             (None, 24)                600       
_________________________________________________________________
dense_11 (Dense)             (None, 3)                 75        
Total params: 747
Trainable params: 747
Non-trainable params: 0
_________________________________________________________________


In [26]:
dqn = DQNAgent(model=model,
               policy=EpsGreedyQPolicy(),
               memory=SequentialMemory(limit=50000, window_length=1),
               nb_actions=env.action_space.n,
               nb_steps_warmup=10)
dqn.compile(optimizer=keras.optimizers.Adam(), 
            metrics=[keras.metrics.mean_absolute_error])
dqn.fit(env=env,
        nb_steps=100,
        visualize=True,
        nb_max_episode_steps=200,
        verbose=1,
        log_interval=200)

Training for 100 steps ...
Interval 1 (0 steps performed)
  9/200 [>.............................] - ETA: 2:56 - reward: 1.0000 





<keras.callbacks.History at 0x139a06c4550>

In [27]:
nb_actions = env.action_space.n
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(keras.optimizers.Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this slows down training quite a lot. 
dqn.fit(env, nb_steps=5000, visualize=True, verbose=1, log_interval=200)

Training for 5000 steps ...
Interval 1 (0 steps performed)
  7/200 [>.............................] - ETA: 29s - reward: 1.0000 



1 episodes - episode_reward: 161.000 [161.000, 161.000] - loss: 0.079 - mean_absolute_error: 29.482 - mean_q: 59.608

Interval 2 (200 steps performed)
1 episodes - episode_reward: 191.000 [191.000, 191.000] - loss: 8.379 - mean_absolute_error: 32.610 - mean_q: 65.570

Interval 3 (400 steps performed)
2 episodes - episode_reward: 103.500 [61.000, 146.000] - loss: 64.346 - mean_absolute_error: 36.499 - mean_q: 73.228

Interval 4 (600 steps performed)
2 episodes - episode_reward: 99.000 [77.000, 121.000] - loss: 20.047 - mean_absolute_error: 34.703 - mean_q: 70.327

Interval 5 (800 steps performed)
1 episodes - episode_reward: 123.000 [123.000, 123.000] - loss: 37.608 - mean_absolute_error: 33.799 - mean_q: 68.272

Interval 6 (1000 steps performed)
2 episodes - episode_reward: 152.500 [134.000, 171.000] - loss: 19.214 - mean_absolute_error: 32.692 - mean_q: 66.048

Interval 7 (1200 steps performed)
1 episodes - episode_reward: 156.000 [156.000, 156.000] - loss: 19.464 - mean_absolute_erro

<keras.callbacks.History at 0x139b758b828>

In [28]:
dqn.test(env, nb_episodes=5, visualize=True)


Testing for 5 episodes ...
Episode 1: reward: 500.000, steps: 500
Episode 2: reward: 500.000, steps: 500
Episode 3: reward: 500.000, steps: 500
Episode 4: reward: 500.000, steps: 500
Episode 5: reward: 500.000, steps: 500


<keras.callbacks.History at 0x139a2692b38>

In [33]:
!mkdir weights

In [35]:
dqn.save_weights(filepath='weights/6_1.h5')

In [42]:
test = DQNAgent(model=model,
               policy=EpsGreedyQPolicy(),
               memory=SequentialMemory(limit=50000, window_length=1),
               nb_actions=env.action_space.n,
               nb_steps_warmup=10)
test.compile(optimizer=keras.optimizers.Adam(), 
            metrics=[keras.metrics.mean_absolute_error])

In [43]:
test.load_weights(filepath='weights/6_1.h5')

In [None]:
test.test(en)