In [3]:
import gym
import random
import numpy as np
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.layers.core import Activation
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from tensorflow.python.util import memory

env = gym.make("CartPole-v0")
states = env.observation_space.shape[0]
actions = env.action_space.n
episodes = 10

for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    print(f"Episode: {episode} Score:{score}")

def build_model(states,actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model 

model = build_model(states, actions)
model.summary()

def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

Episode: 1 Score:32.0
Episode: 2 Score:11.0
Episode: 3 Score:10.0
Episode: 4 Score:27.0
Episode: 5 Score:16.0
Episode: 6 Score:15.0
Episode: 7 Score:11.0
Episode: 8 Score:12.0
Episode: 9 Score:20.0
Episode: 10 Score:10.0
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 4)                 0         
_________________________________________________________________
dense (Dense)                (None, 24)                120       
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 50        
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [4]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

2022-01-07 00:06:53.210117: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Training for 50000 steps ...
Interval 1 (0 steps performed)
   38/10000 [..............................] - ETA: 49s - reward: 1.0000 



101 episodes - episode_reward: 97.208 [8.000, 200.000] - loss: 1.990 - mae: 18.720 - mean_q: 37.994

Interval 2 (10000 steps performed)
55 episodes - episode_reward: 183.964 [159.000, 200.000] - loss: 3.156 - mae: 36.166 - mean_q: 73.145

Interval 3 (20000 steps performed)
51 episodes - episode_reward: 193.804 [165.000, 200.000] - loss: 3.040 - mae: 37.446 - mean_q: 75.425

Interval 4 (30000 steps performed)
51 episodes - episode_reward: 196.490 [165.000, 200.000] - loss: 4.489 - mae: 36.546 - mean_q: 73.391

Interval 5 (40000 steps performed)
done, took 98.356 seconds


<keras.callbacks.History at 0x109c1b850>

In [5]:
scores = dqn.test(env, nb_episodes=100, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: 193.000, steps: 193
Episode 2: reward: 185.000, steps: 185
Episode 3: reward: 179.000, steps: 179
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 178.000, steps: 178
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 181.000, steps: 181
Episode 8: reward: 175.000, steps: 175
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 180.000, steps: 180
Episode 11: reward: 175.000, steps: 175
Episode 12: reward: 177.000, steps: 177
Episode 13: reward: 191.000, steps: 191
Episode 14: reward: 193.000, steps: 193
Episode 15: reward: 179.000, steps: 179
Episode 16: reward: 182.000, steps: 182
Episode 17: reward: 186.000, steps: 186
Episode 18: reward: 178.000, steps: 178
Episode 19: reward: 179.000, steps: 179
Episode 20: reward: 181.000, steps: 181
Episode 21: reward: 200.000, steps: 200
Episode 22: reward: 173.000, steps: 173
Episode 23: reward: 200.000, steps: 200
Episode 24: reward: 178.000, steps: 178
Episode 25: reward: 

In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [6]:
del model
del dqn
del env

In [None]:
env = gym.make('CartPole-v0')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
dqn.test(env, nb_episodes=5, visualize=True)

Testing for 5 episodes ...
Episode 1: reward: -75.000, steps: 76
Episode 2: reward: -118.000, steps: 119
Episode 3: reward: -81.000, steps: 82
Episode 4: reward: -107.000, steps: 108
Episode 5: reward: -85.000, steps: 86


<keras.callbacks.History at 0x2836e6af0>