In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
import gym 
import random
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
!pip install keras-rl2
!pip install keras


Collecting keras-rl2
[?25l  Downloading https://files.pythonhosted.org/packages/dd/34/94ffeab44eef43e22a01d82aa0ca062a97392c2c2415ba8b210e72053285/keras_rl2-1.0.4-py3-none-any.whl (53kB)
[K     |██████▏                         | 10kB 15.1MB/s eta 0:00:01[K     |████████████▎                   | 20kB 13.4MB/s eta 0:00:01[K     |██████████████████▌             | 30kB 9.8MB/s eta 0:00:01[K     |████████████████████████▋       | 40kB 8.0MB/s eta 0:00:01[K     |██████████████████████████████▉ | 51kB 5.5MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 3.4MB/s 
Installing collected packages: keras-rl2
Successfully installed keras-rl2-1.0.4


In [None]:
env = gym.make('CartPole-v0')
states = env.observation_space.shape[0]
actions = env.action_space.n
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        # env.render()
        action = random.choice([0,1])
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:24.0
Episode:2 Score:11.0
Episode:3 Score:33.0
Episode:4 Score:39.0
Episode:5 Score:64.0
Episode:6 Score:10.0
Episode:7 Score:40.0
Episode:8 Score:15.0
Episode:9 Score:26.0
Episode:10 Score:73.0


In [None]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
model = build_model(states, actions)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 4)                 0         
_________________________________________________________________
dense (Dense)                (None, 24)                120       
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 50        
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2,enable_double_dqn=True)
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 16:38 - reward: 1.0000



103 episodes - episode_reward: 96.165 [9.000, 200.000] - loss: 2.493 - mae: 19.332 - mean_q: 39.238

Interval 2 (10000 steps performed)
50 episodes - episode_reward: 199.120 [184.000, 200.000] - loss: 6.918 - mae: 40.354 - mean_q: 81.466

Interval 3 (20000 steps performed)
50 episodes - episode_reward: 199.820 [192.000, 200.000] - loss: 7.176 - mae: 42.593 - mean_q: 85.639

Interval 4 (30000 steps performed)
50 episodes - episode_reward: 199.600 [189.000, 200.000] - loss: 5.960 - mae: 40.513 - mean_q: 81.406

Interval 5 (40000 steps performed)
done, took 484.369 seconds


<tensorflow.python.keras.callbacks.History at 0x7f08433afe10>

In [None]:
_ = dqn.test(env, nb_episodes=15, visualize=False)

Testing for 15 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Episode 11: reward: 200.000, steps: 200
Episode 12: reward: 200.000, steps: 200
Episode 13: reward: 200.000, steps: 200
Episode 14: reward: 200.000, steps: 200
Episode 15: reward: 200.000, steps: 200


In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)