In [33]:
# Importing warnings module to suppress warnings
import warnings
# Suppress all warnings
warnings.filterwarnings("ignore")

# Importing necessary libraries
import random
import gym
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


In [32]:
# Creates an environment for the CartPole-v1 task
env = gym.make("CartPole-v1")

# Retrieves the number of state variables from the environment observation space
states = env.observation_space.shape[0]

# Retrieves the number of actions available in the environment
# (In the case of CartPole, there are two possible actions: move left or move right)
actions = env.action_space.n

# Defines the underlying neural network model using TensorFlow's Keras Sequential API
model = Sequential()
model.add(Flatten(input_shape=(1, states)))
model.add(Dense(24, activation="relu"))
model.add(Dense(24, activation="relu"))
model.add(Dense(actions, activation="linear"))

# Initializes a DQN agent using the above-defined model
agent = DQNAgent(
    model=model,
    memory=SequentialMemory(limit=50000, window_length=1),
    policy=BoltzmannQPolicy(),
    nb_actions=actions,
    nb_steps_warmup=10,
    target_model_update=0.01
)

# Compiles the DQN agent using the Adam optimizer and MAE metric
agent.compile(Adam(learning_rate=0.1), metrics=["mae"])

# Trains the DQN agent on the environment
agent.fit(env, nb_steps=50000, visualize=False, verbose=1)

# Tests the trained agent on the environment for evaluation
res = agent.test(env, nb_episodes=10, visualize=True)

# Prints the average reward obtained by the agent during testing
print(np.mean(res.history["episode_reward"]))

# Closes the environment after training and testing
env.close()

Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 58:32 - reward: 1.0000done, took 1.651 seconds
Testing for 10 episodes ...
Episode 1: reward: 30.000, steps: 30
Episode 2: reward: 33.000, steps: 33
Episode 3: reward: 119.000, steps: 119
Episode 4: reward: 96.000, steps: 96
Episode 5: reward: 35.000, steps: 35
Episode 6: reward: 116.000, steps: 116
Episode 7: reward: 36.000, steps: 36
Episode 8: reward: 60.000, steps: 60
Episode 9: reward: 34.000, steps: 34
Episode 10: reward: 45.000, steps: 45
60.4
