In [2]:
import gym # pip install gym
import numpy as np
import matplotlib.pyplot as plt
import imageio
# import seaborn as sns
# sns.set_theme()

from tensorflow import keras
from tensorflow.keras import models
from tensorflow.keras import layers

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.callbacks import Callback

from rl.agents import DQNAgent  # pip install keras-rl2
from rl.policy import BoltzmannQPolicy  # important to have gym==0.25.2
from rl.memory import SequentialMemory
import warnings
warnings.filterwarnings("ignore")

In [3]:
env = gym.make("LunarLander-v2")  # no render mode to prevent display while training

states = env.observation_space.shape[0]
actions = env.action_space.n

print(states)
print(actions)

8
4


In [15]:
model = Sequential()
model.add(Flatten(input_shape=(1, states)))
model.add(Dense(64, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(actions, activation="linear"))
# Compile the model
model.compile(optimizer=Adam(), loss='mse')

In [16]:
agent = DQNAgent(
    model=model,
    memory=SequentialMemory(limit=50000, window_length=1),
    policy=BoltzmannQPolicy(),
    nb_actions=actions,
    nb_steps_warmup=100,
    target_model_update=0.01
)

agent.compile(Adam(lr=0.001), metrics=["mae"])

In [17]:
# Define a custom callback to record loss values during training
class LossHistory(Callback):
    def __init__(self):
        self.loss_values = []  # Initialize the loss_values list

    def on_epoch_end(self, epoch, logs=None):
        self.loss_values.append(logs.get('loss'))

In [18]:
# Initialize the callback
loss_history = LossHistory()


In [19]:
# Define the number of training steps
total_steps = 50000

# Fit the agent to the environment
history = agent.fit(
    env,
    nb_steps=total_steps,
    visualize=False,
    verbose=1,
    callbacks=[loss_history]
)

Training for 50000 steps ...
Interval 1 (0 steps performed)
31 episodes - episode_reward: -131.149 [-440.066, 12.687] - loss: 7.122 - mae: 14.608 - mean_q: 6.122

Interval 2 (10000 steps performed)
11 episodes - episode_reward: 16.944 [-52.993, 167.654] - loss: 4.998 - mae: 26.895 - mean_q: 31.798

Interval 3 (20000 steps performed)
11 episodes - episode_reward: 84.629 [-12.659, 151.024] - loss: 5.366 - mae: 31.091 - mean_q: 40.635

Interval 4 (30000 steps performed)
10 episodes - episode_reward: 71.479 [-48.434, 139.780] - loss: 5.306 - mae: 32.761 - mean_q: 44.217

Interval 5 (40000 steps performed)
done, took 1009.290 seconds


In [23]:
# Collect loss and reward values from the callback
losses = loss_history.loss_values
rewards=history.history['episode_reward']

In [24]:
# Save the trained model
agent.save_weights('trained_model_weights.h5', overwrite=True)

In [26]:
# Plot the learning progress
plt.figure(figsize=(10, 5))
plt.plot(range(1, 1001), losses, label='Loss')
plt.plot(range(1, 1001), rewards, label='Mean Reward')
plt.xlabel('Episode')
plt.ylabel('Value')
plt.title('Learning Progress')
plt.legend()
plt.grid(True)
plt.show()

: 

In [1]:
# Plot the loss curve
plt.plot(loss_history.loss_values)
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

NameError: name 'plt' is not defined

In [15]:
agent.load_weights("trained_model_weights.h5")

In [16]:
results = agent.test(env, nb_episodes=10, visualize=True)

Testing for 10 episodes ...
Episode 1: reward: 281.826, steps: 301
Episode 2: reward: 232.623, steps: 241
Episode 3: reward: 288.416, steps: 325
Episode 4: reward: 253.049, steps: 247
Episode 5: reward: 263.854, steps: 340
Episode 6: reward: 179.674, steps: 1000
Episode 7: reward: 241.297, steps: 330
Episode 8: reward: 282.828, steps: 195
Episode 9: reward: 253.156, steps: 294
Episode 10: reward: 284.089, steps: 258


In [17]:
frames = []
state = env.reset()
done = False
while not done:
    frames.append(env.render(mode='rgb_array'))
    action = agent.act(state, test=True)
    state, _, done, _ = env.step(action)

# Save the video of the episode
imageio.mimsave("episode_video.mp4", frames, fps=30)

AttributeError: 'DQNAgent' object has no attribute 'act'

In [22]:
env.close()