In [2]:
pip install highway_env

Collecting highway_env
  Downloading highway_env-1.10.1-py3-none-any.whl.metadata (16 kB)
Collecting gymnasium>=1.0.0a2 (from highway_env)
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pygame>=2.0.2 (from highway_env)
  Downloading pygame-2.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading highway_env-1.10.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading pygame-2.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m63.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pygame, 

In [74]:
import gymnasium
import highway_env
from stable_baselines3 import DQN,PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
import wandb
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.utils import get_device

import numpy as np
import time
import warnings
import json

# Suppress specific warnings
warnings.filterwarnings("ignore") 

In [75]:
device = get_device()
WANDB_LOG = True

In [76]:
class CustomCallback(BaseCallback):
    def __init__(self, verbose=0):
        super(CustomCallback, self).__init__(verbose)
        self.max_length_ep = 100
        self.print_every = 10
        self.ep_rewards = np.zeros(self.max_length_ep)
        self.ep_speeds = np.zeros(self.max_length_ep)

        self.total_step_counter = 0
        self.step = 0
        self.episode = 0
        self.time_ep_start = time.time()
        

    def _on_step(self) -> bool:
        # print("num_collected_steps: {} | total_timesteps: {} | num_collected_episodes: {} | rewards type : {}".format(self.locals["num_collected_steps"],
        #                                                                                                          self.locals["total_timesteps"],
        #                                                                                                          self.locals["num_collected_episodes"],
        #                                                                                                          (self.locals["rewards"]).shape))

        done = self.locals["dones"][0] # It is plural "dones" coz of multiple environment parallelisation
        if done:

            reward = self.locals["rewards"][0]
            speed = self.locals["infos"][0]["speed"]
            self.ep_rewards[self.step] = reward
            self.ep_speeds[self.step] = speed
    
            self.time_ep_end = time.time()
            duration = self.time_ep_end - self.time_ep_start
            total_reward = self.ep_rewards.sum()
            avg_speed = self.ep_speeds.mean()
            crashed = self.locals["infos"][0]["crashed"]


            log_dict = {"Episode":self.episode,
                        "Episode Reward":total_reward,
                        "Duration for Ep to Run":duration,
                        "Episode Length":self.step,
                        "Crashed":int(crashed),
                        "Average Speed":avg_speed
            }
            
            if self.episode%self.print_every==0:
            
                print("Episode {} | Duration: {:.3f} | Total Step Counter: {}/{} | Episode Reward: {:.3f} | Avg Speed: {:.3f} | Crashed: {}".format(self.episode,duration,
                                                                                                                                                self.total_step_counter,
                                                                                                                                                self.locals["total_timesteps"],
                                                                                                                                                total_reward,
                                                                                                                                                avg_speed,crashed))
            
            # Do Wandb logging
            if WANDB_LOG:
                wandb.log(log_dict,step=self.episode)
            
            self.ep_rewards = np.zeros(self.max_length_ep)
            self.ep_speeds = np.zeros(self.max_length_ep)
            self.episode += 1
            self.step = 0
            self.time_ep_start = time.time()
            
        else:     
            # print("Episode not ended")
            
            reward = self.locals["rewards"][0]
            speed = self.locals["infos"][0]["speed"]
            self.ep_rewards[self.step] = reward
            self.ep_speeds[self.step] = speed

            self.step += 1

        self.total_step_counter += 1
        
    
        return True  # Continue training


In [95]:
config_dict = {"env_name":"highway-v0",
          "project_name":"RoadSense",
          "total_timesteps":100,
               "absolute":False,
          "model_save_path":"models/dqn_highway_100/model",
          "env_config_save_path":"models/dqn_highway_vehicle/env_config.json",
          "model_name":"model_name_ts_100",
          "wandb_log":WANDB_LOG,
          "verbose":0}

env_config_dict = {'observation': {'type': 'Kinematics',
  'vehicles_count': 6,
  'features': ['presence', 'x', 'y', 'vx', 'vy'],
  'features_range': {'x': [-100, 100],
   'y': [-100, 100],
   'vx': [-20, 20],
   'vy': [-20, 20]},
  'absolute': config_dict["absolute"],
  'flatten': False,
  'observe_intentions': False},
 'action': {'type': 'DiscreteMetaAction',
  'longitudinal': True,
  'lateral': False,
  'target_speeds': [0, 4.5, 9]},
 'simulation_frequency': 15,
 'policy_frequency': 1,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'screen_width': 600,
 'screen_height': 600,
 'centering_position': [0.5, 0.6],
 'scaling': 7.15,
 'show_trajectories': False,
 'render_agent': True,
 'offscreen_rendering': False,
 'manual_control': False,
 'real_time_rendering': False,
 'duration': 13,
 'destination': 'o1',
 'controlled_vehicles': 1,
 'initial_vehicle_count': 10,
 'spawn_probability': 0.6,
 'collision_reward': -10,
 'high_speed_reward': 1,
 'arrived_reward': 1.5,
 'reward_speed_range': [7.0, 9.0],
 'normalize_reward': False,
 'offroad_terminal': False}

env = gymnasium.make(config_dict["env_name"])#,config=env_config_dict)
env = Monitor(env)

wandb_log = config_dict["wandb_log"]

if wandb_log:
    run = wandb.init(
        project="RoadSense",
        config = config_dict
    )
model = DQN('MlpPolicy', env,
              policy_kwargs=dict(net_arch=[256, 256]),
              learning_rate=5e-4,
              buffer_size=15000,
              learning_starts=200,
              batch_size=32,
              gamma=0.8,
              train_freq=1,
              gradient_steps=1,
              target_update_interval=50,
              verbose=1,
              tensorboard_log="highway_dqn/")
            
# model = DQN('MlpPolicy', env,
#               policy_kwargs=dict(net_arch=[256, 256]),
#               learning_rate=5e-4,
#               buffer_size=15000,
#               learning_starts=20,
#               batch_size=32,
#               gamma=0.8,
#               train_freq=1,
#               gradient_steps=1,
#               target_update_interval=50,
#               verbose=0,
#                device = device)

# tensorboard_log="logs/{}".format(config_dict["model_name"])

model.learn(total_timesteps=config_dict["total_timesteps"],callback=CustomCallback(config_dict["verbose"]))
model.save(config_dict["model_save_path"])

# with open(config_dict["env_config_save_path"], 'w') as f:
#     json.dump(env_config_dict, f)

if wandb_log:
    run.finish()

0,1
Average Speed,▂▁▆▄▄▅▃▄▅▅▅▃▃▂▂▁▃▁▆▄▅▆▄▃▅▁▆▃▂█▄▃▃▆▄▃▄▅▄▂
Crashed,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Duration for Ep to Run,█▄▁▁▁▂▁▆▃▅▅▄▃▂▄▂▄▃▃▃▂▃▂▃▄▄▄▂▃▄▄▁▅▅▄▃▃▅▃▃
Episode,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
Episode Length,█▅▅▂▂▁▃▇▁▁▃▂▂▃▃▂▄▄▆▄▂▂▄▅▃▁▂▅▃▁▃▄█▄▄▂▂▄▅▃
Episode Reward,▁▄▁▂▁▂▂▃█▄▄▂▃█▃▂▄▁▁▂▄▄▃▅▃▁▁▇▄▅▃▃▂▆▅▄▅▆▅▄

0,1
Average Speed,2.64848
Crashed,1.0
Duration for Ep to Run,3.77861
Episode,166.0
Episode Length,9.0
Episode Reward,8.45693




Using cuda device
Wrapping the env in a DummyVecEnv.
Logging to highway_dqn/DQN_1
Episode 0 | Duration: 3.124 | Total Step Counter: 7/100 | Episode Reward: 5.597 | Avg Speed: 1.753 | Crashed: True
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 8.5      |
|    ep_rew_mean      | 5.98     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 2        |
|    time_elapsed     | 13       |
|    total_timesteps  | 34       |
----------------------------------


0,1
Average Speed,▂▁▄▁▁█
Crashed,▁▁▁▁▁▁
Duration for Ep to Run,▂▁▄▁▁█
Episode,▁▂▄▅▇█
Episode Length,▂▁▄▁▁█
Episode Reward,▂▁▄▁▁█
global_step,▁▁▁▁
rollout/ep_len_mean,▁
rollout/ep_rew_mean,▁
rollout/exploration_rate,▁

0,1
Average Speed,6.31821
Crashed,1.0
Duration for Ep to Run,10.73727
Episode,5.0
Episode Length,26.0
Episode Reward,20.5061
global_step,34.0
rollout/ep_len_mean,8.5
rollout/ep_rew_mean,5.97517
rollout/exploration_rate,0.05


In [None]:
# Load and test saved model
env = gymnasium.make(config_dict["env_name"])
model = DQN.load(config_dict["model_save_path"])
while True:
    done = truncated = False
    obs, info = env.reset()
    while not (done or truncated):
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        print("Reward : {:.3f} | Action: {}".format(reward,action))

    break    
    # env.render()