## Imports & Constants

In [1]:
import sys
import os

# Assuming the notebook is located inside `subfolder` and
# we want to import a module in `parent_folder`

# Get the absolute path to the 'subfolder'.
notebook_dir = os.path.dirname(os.path.abspath("__file__"))

# Calculate the path to the 'parent_folder' by going one level up.
parent_dir = os.path.dirname(notebook_dir)

# Add the 'parent_folder' to sys.path if it is not already there.
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [2]:
import time
import numpy as np
import highway_env
import gymnasium as gym
import matplotlib.pyplot as plt

from stable_baselines3 import DQN
from IPython.display import clear_output
from gymnasium.wrappers import RecordVideo

from agents import MlpDQNAgent

%matplotlib inline

# Environment

In [11]:
config = {
    'duration': 50,
    'lanes_count': 4,
    # "screen_width": 600*4,  # [px]
    # "screen_height": 150*4,  # [px]
}

In [12]:
env = gym.make("highway-v0", render_mode="rgb_array")
env.unwrapped.configure(config)
obs, info = env.reset()

# Random Agent

In [5]:
def test_random(env: gym.Env, num_episodes: int, render: bool = True, time_interval: float = 0.2):
    episode_lengths = []
    undiscounted_rewards = []
    for _ in range(num_episodes):
        done = truncated = False

        episode_reward = 0
        episode_length = 0

        obs, _ = env.reset()
        while not (done or truncated):
            action = env.action_space.sample()
            obs, reward, done, truncated, _ = env.step(action)

            episode_reward += reward
            episode_length += 1

            if not render:
                continue

            clear_output(True)
            plt.imshow(env.render())
            plt.show()
            time.sleep(time_interval)

        undiscounted_rewards.append(episode_reward)
        episode_lengths.append(episode_length)

    return episode_lengths, undiscounted_rewards

In [None]:
ep_lens, ep_rews = test_random(env, 200, render=False)

In [36]:
np.mean(ep_rews)

10.899125836416482

# Stable Baselines DQN

In [3]:
model = DQN(
    "MlpPolicy",
    env,
    policy_kwargs=dict(net_arch=[256, 256]),
    learning_rate=5e-4,
    buffer_size=15_000,
    learning_starts=200,
    batch_size=32,
    gamma=0.8,
    train_freq=1,
    gradient_steps=1,
    target_update_interval=50,
    verbose=1,
    tensorboard_log="highway_dqn_checkpoints/",
    )

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


### Training a DQN agent

In [None]:
model.learn(2e3, progress_bar=True)
model.save("highway_dqn/model")

### Testing the agent

In [9]:
model = DQN.load("highway_dqn/model")

In [None]:
while True:
    done = truncated = False
    obs, info = env.reset()

    while not (done or truncated):
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)

        clear_output(True)
        plt.imshow(env.render())
        plt.show()
        time.sleep(0.2)

# Our Implementation of DQN

In [12]:
seed = 777
num_frames = 200_000
memory_size = 10_000
gamma = 0.85
batch_size = 64
target_update = 50
epsilon_decay = 1 / (num_frames // 10)

agent = MlpDQNAgent(
    env,
    memory_size,
    batch_size,
    target_update,
    epsilon_decay,
    seed,
    gamma = gamma
)

cuda


### Recording a video

In [13]:
vid_env = RecordVideo(env, video_folder="../videos/random/",
              episode_trigger=lambda e: True)  # record all episodes

# Provide the video recorder to the wrapped environment
# so it can send it intermediate simulation frames.
vid_env.unwrapped.set_record_video_wrapper(vid_env)

  logger.warn(


In [14]:
for i in range(10):
    # Record a video as usual
    obs, info = vid_env.reset()
    done = truncated = False
    while not (done or truncated):
        action = vid_env.action_space.sample()
        obs, reward, done, truncated, info = vid_env.step(action)
        vid_env.render()
vid_env.close()

Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-0.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-0.mp4



                                                   

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-0.mp4




Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-1.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-1.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-1.mp4




Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-2.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-2.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-2.mp4
Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-3.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-3.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-3.mp4
Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-4.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-4.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-4.mp4
Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-5.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-5.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-5.mp4




Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-6.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-6.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-6.mp4
Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-7.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-7.mp4



                                                   

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-7.mp4




Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-8.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-8.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-8.mp4




Moviepy - Building video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-9.mp4.
Moviepy - Writing video /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-9.mp4



                                                                

Moviepy - Done !
Moviepy - video ready /home/sowell/projects/rl_group_assignment/videos/random/rl-video-episode-9.mp4




# Manual Control

In [3]:
config = {
    'duration': 50,
    'lanes_count': 4,
    # "screen_width": 600*4,  # [px]
    # "screen_height": 150*4,  # [px]
    'manual_control': True
}

In [4]:
env = gym.make("highway-fast-v0", render_mode="rgb_array")
env.unwrapped.configure(config)
obs, info = env.reset()

In [None]:
episode_lens = []
episode_rewards = []

for i in range(20):
    score = 0
    length = 0

    env.reset()
    done = False
    while not done:
        # With manual control, these actions are ignored
        obs, reward, done, truncated, info = env.step(env.action_space.sample()) 
        clear_output(True)
        plt.imshow(env.render())
        plt.show()
        time.sleep(0.1)
        done = done or truncated

        score += reward
        length += 1

    episode_lens.append(length)
    episode_rewards.append(score)