In [2]:
import gym, random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [3]:
env = gym.make('CartPole-v0')
states = env.observation_space.shape[0]
action = env.action_space.n

In [4]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,states)))
    model.add(Dense(24,activation='relu'))
    model.add(Dense(24,activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model
    

In [5]:
model = build_model(states, action)

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 4)                 0         
_________________________________________________________________
dense (Dense)                (None, 24)                120       
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 50        
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [7]:
def build_agent(model, action):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,nb_actions=action,
                   nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [8]:
dqn = build_agent(model, action)
dqn.compile(Adam(learning_rate=0.003),metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

2021-09-13 21:18:27.547195: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 8:02 - reward: 1.0000



102 episodes - episode_reward: 97.000 [10.000, 200.000] - loss: 1.262 - mae: 18.265 - mean_q: 37.112

Interval 2 (10000 steps performed)
53 episodes - episode_reward: 189.321 [137.000, 200.000] - loss: 2.486 - mae: 34.073 - mean_q: 68.883

Interval 3 (20000 steps performed)
51 episodes - episode_reward: 195.176 [101.000, 200.000] - loss: 6.882 - mae: 40.253 - mean_q: 81.146

Interval 4 (30000 steps performed)
52 episodes - episode_reward: 193.577 [129.000, 200.000] - loss: 12.141 - mae: 44.302 - mean_q: 89.043

Interval 5 (40000 steps performed)
done, took 203.697 seconds


<keras.callbacks.History at 0x7fc6777a69d0>

In [9]:
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...




Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
200.0


In [10]:
dqn.save_weights("weights.h5f", overwrite=True)