In [2]:
!pip install stable-baselines3[extra] gymnasium highway-env

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting highway-env
  Downloading highway_env-1.10.1-py3-none-any.whl.metadata (16 kB)
Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.4.0-py3-none-any.whl.metadata (4.5 kB)
Collecting ale-py>=0.9.0 (from stable-baselines3[extra])
  Downloading ale_py-0.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.6 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m46.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading highway_env-1.10.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ale_py-0.10.1-cp310-cp310-manylinux_2_17_x

In [None]:
# Custom Callback to Log Training Performance
class RewardLoggingCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(RewardLoggingCallback, self).__init__(verbose)
        self.episode_rewards = []  # Store episode rewards
        self.episode_lengths = []  # Store episode lengths

    def _on_step(self) -> bool:
        # Check if a new episode has started
        if "episode" in self.locals:
            reward = self.locals["infos"][0]["episode"]["r"]
            length = self.locals["infos"][0]["episode"]["l"]
            self.episode_rewards.append(reward)
            self.episode_lengths.append(length)
        return True

In [None]:
import gymnasium as gym
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise
import highway_env
import numpy as np

  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)


In [None]:
# Create the environment
env = gym.make("highway-v0")

env.unwrapped.configure({
    "action": {
        "type": "ContinuousAction",  # Use continuous action space
    },
    "simulation_frequency": 15
})

# Reset the environment
obs, info = env.reset()


# **Train**

In [None]:
# Create action noise for exploration
n_actions = env.action_space.shape[0]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Create the DDPG model
model = DDPG(
    "MlpPolicy",
    env,
    action_noise=action_noise,
    learning_rate=0.001,
    gamma=0.99,
    buffer_size=1000000,
    tau=0.005,
    batch_size=64,
    verbose=1,
    tensorboard_log="./ddpg_highway_tensorboard/"
)

# Initialize the callback
reward_logger = RewardLoggingCallback()

# Train the model
model.learn(total_timesteps=20000, callback=reward_logger)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./ddpg_highway_tensorboard/DDPG_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 40       |
|    ep_rew_mean     | 8.22     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 1        |
|    time_elapsed    | 88       |
|    total_timesteps | 160      |
| train/             |          |
|    actor_loss      | -0.368   |
|    critic_loss     | 0.00899  |
|    learning_rate   | 0.001    |
|    n_updates       | 59       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 40       |
|    ep_rew_mean     | 16.5     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 1        |
|    time_elapsed    | 178      |
|    total_timesteps | 320      |
| train/             |          |
|    actor_

<stable_baselines3.ddpg.ddpg.DDPG at 0x786173e82650>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import numpy as np

# Save rewards after the first 20k timesteps
np.save("/content/drive/My Drive/ddpg_rewards_10k.npy", reward_logger.episode_rewards)
print("Rewards after 20k timesteps saved!")


# Save the model
model.save("/content/drive/My Drive/ddpg_highway")

# Save the replay buffer
model.save_replay_buffer("/content/drive/My Drive/ddpg_replay_buffer")
env.close()

Mounted at /content/drive


In [None]:
import numpy as np
from stable_baselines3 import DDPG
import matplotlib.pyplot as plt

# Reload environment
env = gym.make("highway-v0")
env.unwrapped.configure({
    "action": {
        "type": "ContinuousAction",  # Use continuous action space
    },
    "simulation_frequency": 15
})
obs, info = env.reset()

# Load the model and replay buffer
model = DDPG.load("/content/drive/My Drive/ddpg_highway", env=env)
model.load_replay_buffer("/content/drive/My Drive/ddpg_replay_buffer")
print("Model and replay buffer loaded successfully!")

# Load previously saved rewards
old_rewards = []
try:
    old_rewards = np.load("/content/drive/My Drive/ddpg_rewards_20k.npy").tolist()
    print("Previous rewards loaded successfully!")
except FileNotFoundError:
    print("No previous rewards found. Starting fresh.")

# Initialize new reward logger
reward_logger = RewardLoggingCallback()

# Continue training
model.learn(total_timesteps=10000, callback=reward_logger)

# Combine old and new rewards
combined_rewards = old_rewards + reward_logger.episode_rewards

# Save updated model, buffer, and rewards
model.save("/content/drive/My Drive/ddpg_highway")
model.save_replay_buffer("/content/drive/My Drive/ddpg_replay_buffer")
np.save("/content/drive/My Drive/ddpg_rewards_30k.npy", combined_rewards)
print("Training complete! Model, replay buffer, and rewards updated.")

# Plot combined rewards
plt.figure(figsize=(10, 5))
plt.plot(combined_rewards)
plt.title("Training Performance (Episode Rewards)")
plt.xlabel("Episodes")
plt.ylabel("Rewards")
plt.grid()
plt.show()


In [None]:
# Load TensorBoard
%load_ext tensorboard
%tensorboard --logdir ./ddpg_highway_tensorboard/DDPG_1

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Read TensorBoard logs using Pandas
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

def extract_tensorboard_data(logdir, scalar_name):
    # Load the TensorBoard log file
    event_acc = EventAccumulator(logdir)
    event_acc.Reload()
    scalar_events = event_acc.Scalars(scalar_name)
    steps = [event.step for event in scalar_events]
    values = [event.value for event in scalar_events]
    return steps, values

# Path to TensorBoard log file
logdir = "./ddpg_highway_tensorboard/DDPG_1"
scalar_name = "rollout/ep_rew_mean"  # Average reward

# Extract data
steps, rewards = extract_tensorboard_data(logdir, scalar_name)

# Plot
plt.figure(figsize=(10, 6))
plt.plot(steps, rewards, label="Reward Curve", color="b")
plt.xlabel("Timesteps")
plt.ylabel("Mean Reward")
plt.title("Training Performance: DDPG on Highway-v0")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import gymnasium as gym
from stable_baselines3 import DDPG
import highway_env
import warnings

warnings.filterwarnings("ignore")

# Reload the environment and model
env = gym.make("highway-v0")
env.unwrapped.configure({
    "action": {
        "type": "ContinuousAction",  # Use continuous action space
    },
    "simulation_frequency": 15
})
obs, info = env.reset()

# Load the trained model
model = DDPG.load("ddpg_highway_improve", env=env)

# Lists to log rewards and timesteps
timestep_rewards = []
cumulative_rewards = []
timesteps = []

# Run and test the model
current_timestep = 0

for episode in range(100):  # Test over 10 episodes
    done = truncated = False
    obs, info = env.reset()
    total_reward = 0

    while not (done or truncated):
        # Predict action using the trained model
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)

        total_reward += reward
        current_timestep += 1
        env.render()  # Visualize the environment

        # Log timestep data
        timestep_rewards.append(reward)
        timesteps.append(current_timestep)

    cumulative_rewards.append(total_reward)
    print(f"Episode {episode + 1}: Reward = {total_reward}")

env.close()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Cumulative Rewards Plot
episodes = range(1, len(cumulative_rewards) + 1)
plt.figure(figsize=(10, 6))
plt.plot(episodes, cumulative_rewards, marker='o', label="Cumulative Reward per Episode")
plt.xlabel("Episode")
plt.ylabel("Cumulative Reward")
plt.title("Model Performance Over Episodes")
plt.legend()
plt.grid(True)
plt.show()