## Running Their DQN

## Installations

In [2]:
!pip install highway-env
!pip install --upgrade sympy torch
!git+https://github.com/DLR-RM/stable-baselines3

Collecting sympy
  Downloading sympy-1.13.3-py3-none-any.whl (6.2 MB)
[K     |████████████████████████████████| 6.2 MB 4.8 MB/s eta 0:00:01
zsh:1: no such file or directory: git+https://github.com/DLR-RM/stable-baselines3


In [4]:
import gymnasium as gym
import highway_env
from stable_baselines3 import DQN
import torch


# Visualization utils
%load_ext tensorboard
import sys
from tqdm.notebook import trange
# !pip install tensorboardx gym pyvirtualdisplay
# doesn't work cause not linux
# !apt-get install -y xvfb ffmpeg
# !git clone https://github.com/Farama-Foundation/HighwayEnv.git 2> /dev/null
# sys.path.insert(0, '/content/HighwayEnv/scripts/')
# from utils import record_videos, show_videos

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [26]:
# for MLP
config = {
    "observation": {
        "type": "Kinematics",
        "vehicles_count": 15,
        "features": ["presence", "x", "y", "vx", "vy", "cos_h", "sin_h"],
        "features_range": {
            "x": [-100, 100],
            "y": [-100, 100],
            "vx": [-20, 20],
            "vy": [-20, 20]
        },
        "absolute": False,
        "order": "sorted"
    }
}

# # For CNN
# config = {
#     "observation": {
#         "type": "GrayscaleObservation",
#         "observation_shape": (128, 64),
#         "stack_size": 4,
#         "weights": [0.2989, 0.5870, 0.1140],  # weights for RGB conversion
#         "scaling": 1.75,
#     },
#     "policy_frequency": 2
# }

env = gym.make("highway-fast-v0", render_mode='rgb_array', config=config)
print(env.unwrapped.config)
mps_device = torch.device("mps")

model = DQN('MlpPolicy', env,
              policy_kwargs=dict(net_arch=[256, 256]),
              learning_rate=5e-4,
              buffer_size=15000,
              learning_starts=200,
              batch_size=32,
              gamma=0.8,
              train_freq=1,
              gradient_steps=1,
              target_update_interval=50,
              verbose=1,
              tensorboard_log="highway_dqn/")
model.learn(int(2e4))

{'observation': {'type': 'Kinematics', 'vehicles_count': 15, 'features': ['presence', 'x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'], 'features_range': {'x': [-100, 100], 'y': [-100, 100], 'vx': [-20, 20], 'vy': [-20, 20]}, 'absolute': False, 'order': 'sorted'}, 'action': {'type': 'DiscreteMetaAction'}, 'simulation_frequency': 5, 'policy_frequency': 1, 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle', 'screen_width': 600, 'screen_height': 150, 'centering_position': [0.3, 0.5], 'scaling': 5.5, 'show_trajectories': False, 'render_agent': True, 'offscreen_rendering': False, 'manual_control': False, 'real_time_rendering': False, 'lanes_count': 3, 'vehicles_count': 20, 'controlled_vehicles': 1, 'initial_lane_id': None, 'duration': 30, 'ego_spacing': 1.5, 'vehicles_density': 1, 'collision_reward': -1, 'right_lane_reward': 0.1, 'high_speed_reward': 0.4, 'lane_change_reward': 0, 'reward_speed_range': [20, 30], 'normalize_reward': True, 'offroad_terminal': False}
Using cpu device
Wrapp

<stable_baselines3.dqn.dqn.DQN at 0x301e97cd0>

In [27]:
model.save("highway_dqn/model")

In [None]:
# Load and test saved model
model = DQN.load("highway_dqn/model")

env = gym.make("highway-v0", render_mode='rgb_array', config=config)
mps_device = torch.device("mps")

model = DQN('MlpPolicy', env,
              policy_kwargs=dict(net_arch=[256, 256]),
              learning_rate=2e-2,
              buffer_size=15000,
              learning_starts=200,
              batch_size=64,
              gamma=0.8,
              device=mps_device,
              train_freq=1,
              gradient_steps=1,
              target_update_interval=50,
              verbose=1,
              tensorboard_log="highway_dqn/")

# while True:
for i in range(100):
  done = truncated = False
  obs, info = env.reset()
  while not (done or truncated):
    action, _states = model.predict(obs, deterministic=False)
    print(action)
    obs, reward, done, truncated, info = env.step(action)
    env.render()

{'observation': {'type': 'Kinematics', 'vehicles_count': 15, 'features': ['presence', 'x', 'y', 'vx', 'vy', 'cos_h', 'sin_h'], 'features_range': {'x': [-100, 100], 'y': [-100, 100], 'vx': [-20, 20], 'vy': [-20, 20]}, 'absolute': False, 'order': 'sorted'}, 'action': {'type': 'DiscreteMetaAction'}, 'simulation_frequency': 15, 'policy_frequency': 1, 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle', 'screen_width': 600, 'screen_height': 150, 'centering_position': [0.3, 0.5], 'scaling': 5.5, 'show_trajectories': False, 'render_agent': True, 'offscreen_rendering': False, 'manual_control': False, 'real_time_rendering': False, 'lanes_count': 4, 'vehicles_count': 50, 'controlled_vehicles': 1, 'initial_lane_id': None, 'duration': 40, 'ego_spacing': 2, 'vehicles_density': 1, 'collision_reward': -1, 'right_lane_reward': 0.1, 'high_speed_reward': 0.4, 'lane_change_reward': 0, 'reward_speed_range': [20, 30], 'normalize_reward': True, 'offroad_terminal': False}
Using mps device
Wrappi