In [1]:
import gym
import random
from IPython import display

import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [2]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, policy=policy, memory=memory,
                   nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [3]:
load = True
if load:
    env = gym.make('CartPole-v1', render_mode='human')
    states = env.observation_space.shape[0]
    actions = env.action_space.n

    model = build_model(states, actions)
    model.summary()
    dqn = build_agent(model, actions)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    dqn.load_weights('_weights/dqn_weights.h5f')
    _ = dqn.test(env, nb_episodes=1, visualize=False)

    raise StopIteration()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 4)                 0         
_________________________________________________________________
dense (Dense)                (None, 24)                120       
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 50        
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


2023-04-30 17:46:58.853381: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Testing for 1 episodes ...


2023-04-30 17:46:59.878 Python[11371:16099062] ApplePersistenceIgnoreState: Existing state will not be touched. New state will be written to /var/folders/j3/7jchtq_n7cz4zm5b85nj95lc0000gn/T/org.python.python.savedState


KeyboardInterrupt: 

In [None]:
env = gym.make('CartPole-v1')
states = env.observation_space.shape[0]
actions = env.action_space.n

model = build_model(states, actions)
model.summary()

In [None]:
episodes = 10
for i in range(episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        render = env.render()
        action = random.choice([0, 1])
        n_state, reward, done, info, other = env.step(action)
        score += reward
    # print("Episode: {} Score: {}".format(i, score))

Episode: 0 Score: 40.0
Episode: 1 Score: 11.0
Episode: 2 Score: 12.0
Episode: 3 Score: 15.0
Episode: 4 Score: 29.0
Episode: 5 Score: 34.0
Episode: 6 Score: 12.0
Episode: 7 Score: 13.0
Episode: 8 Score: 29.0
Episode: 9 Score: 50.0
Episode: 10 Score: 11.0


  gym.logger.warn(


In [None]:
env = gym.make('CartPole-v1')
states = env.observation_space.shape[0]
actions = env.action_space.n

model = build_model(states, actions)
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_6 (Dense)              (None, 24)                120       
_________________________________________________________________
dense_7 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 50        
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
48 episodes - episode_reward: 205.646 [30.000, 391.000] - loss: 1.256 - mae: 37.837 - mean_q: 76.046

Interval 2 (10000 steps performed)
29 episodes - episode_reward: 332.483 [175.000, 586.000] - loss: 1.383 - mae: 39.249 - mean_q: 78.815

Interval 3 (20000 steps performed)
16 episodes - episode_reward: 607.125 [92.000, 1714.000] - loss: 1.421 - mae: 42.831 - mean_q: 85.985

Interval 4 (30000 steps performed)
Interval 5 (40000 steps performed)
done, took 1047.835 seconds


<tensorflow.python.keras.callbacks.History at 0x7fb4558c2400>

In [None]:
env = gym.make('CartPole-v1', render_mode='human')
states = env.observation_space.shape[0]
actions = env.action_space.n

_ = dqn.test(env, nb_episodes=1, visualize=False)
dqn_store = dqn

Testing for 1 episodes ...


KeyboardInterrupt: 

In [None]:
overwrite = False
dqn.save_weights('_weights/dqn_weights.h5f')
dqn_store = dqn

In [None]:
env = gym.make('CartPole-v1', render_mode='human')
states = env.observation_space.shape[0]
actions = env.action_space.n

model = build_model(states, actions)
model = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.load_weights('_weights/dqn_weights.h5f')

_ = dqn.test(env, nb_episodes=1, visualize=False)



Testing for 1 episodes ...




KeyboardInterrupt: 