# Final Project: Reinforcement Learning for Autonomous Driving in Highway-Env
# Course: CSC 580 - Artificial Intelligence II
## Group-10
### Members:
 ###    - Sanjay Uppala (2164383)
###  - Yogitha Pollisetty (2130220)
### DePaul University

# Installing required poackages 

In [1]:
!pip install highway-env 
!pip install stable-baselines3 
!pip install gymnasium 
!pip install opencv-python
!pip install tensorboard



In [5]:
import os
import time
import glob
import io
import base64
import numpy as np
import gymnasium as gym
import highway_env
import cv2
from stable_baselines3 import DQN,A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from IPython import get_ipython
from IPython.display import HTML, display
from IPython import get_ipython
try:
    IN_JUPYTER = True
except ImportError:
    IN_JUPYTER = False
    


# 2) Directory Setup

In [3]:
PROJECT_DIR = os.path.join(os.getcwd(), "Project_A2C")
LOG_DIR = os.path.join(PROJECT_DIR, "logs")
MODEL_DIR = os.path.join(PROJECT_DIR, "models")
VIDEO_DIR = os.path.join(PROJECT_DIR, "videos")

for folder in [PROJECT_DIR, LOG_DIR, MODEL_DIR, VIDEO_DIR]:
    os.makedirs(folder, exist_ok=True)

# 3) Utility: show_video in Jupyter

In [4]:
def show_video(video_path_pattern):
    ''' 
        Display the latest video in the Notebook
    '''
    mp4_list = glob.glob(video_path_pattern)
    if not mp4_list:
        print(f"[WARN] No video found for: {video_path_pattern}")
        return
    video_path = mp4_list[0]
    with open(video_path, "rb") as f:
        video_data = f.read()
    encoded_video = base64.b64encode(video_data).decode("ascii")
    if IN_JUPYTER:
        display(HTML(data=f'''
            <video width="640" height="480" controls>
                <source src="data:video/mp4;base64,{encoded_video}" type="video/mp4">
            </video>
        '''))
    else:
        print(f"[INFO] Video saved: {video_path}")

# 4) Environment Creation

In [5]:
def make_env(env_name="highway-fast-v0"):
    '''
    Created a Configure for the Enviornment using Gym for traning
    '''
    env = gym.make(env_name, render_mode="rgb_array")
    if hasattr(env.unwrapped, "configure"):
        env.unwrapped.configure({
            "lanes_count": 4,
            "vehicles_count": 20,
            "duration": 40,
            "collision_reward": -2,
            "high_speed_reward": 1,
            "reward_speed_range": [20, 30],
            "action": {"type": "DiscreteMetaAction"}
        })
    return Monitor(env)

# 5) Video Recording with OpenCV

In [6]:
def record_with_opencv(frames, out_file="demo.mp4", fps=15):
    '''
    OpennCv package has been used to record the video in 3 format-MP4,AVC1,XVID
    '''
    if not frames:
        print("[WARN] No frames to record.")
        return
    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'avc1') 
    out = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
    for frame in frames:
        bgr_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(bgr_frame)
    out.release()
    print(f"[INFO] Created video: {out_file}")

# 6)Training A2C Model 

In [7]:
if __name__ == "__main__":
    ENVIRONMENTS = ["highway-fast-v0", "intersection-v0", "roundabout-v0"]
    LEARNING_RATES = [0.0005, 0.001]
    GAMMAS = [0.95, 0.8]

    TOTAL_TIMESTEPS = 100000  

    # dictionary to store final results 
    results = {}

    for env_name in ENVIRONMENTS:
        for lr in LEARNING_RATES:
            for gm in GAMMAS:
                # Unique run_id
                run_id = f"{env_name}_A2C_lr{lr}_gm{gm}"
                print(f"TRAINING RUN: {run_id}")

                # Create environment
                vec_env = make_env(env_name)

                tb_log_dir = os.path.join(LOG_DIR, run_id)

                # Initialize the A2C model
                model = A2C(
                    "MlpPolicy",  # Multi-layer Perceptron policy
                    vec_env,          # Gym environment
                    learning_rate=lr,
                    gamma=gm,   # Discount factor
                    n_steps=5,    # Number of steps before updating gradients
                    vf_coef=0.5,  # Value function coefficient
                    ent_coef=0.01,  # Entropy regularization (improves exploration)
                    verbose=0,
                    tensorboard_log=tb_log_dir
                )

                # Train
                model.learn(total_timesteps=TOTAL_TIMESTEPS)

                # Evaluate
                mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=5)

                if mean_reward > 0:
                    success_rate = (mean_reward - std_reward) / mean_reward
                else:
                    success_rate = 0

                print(f" Completed run for env={env_name}, gamma={gm}, lr={lr}")
                print(f"   Final average reward: {mean_reward:.2f}")
                print(f"   Success rate: {success_rate:.2f} \n")

                # Save model
                model_path = os.path.join(MODEL_DIR, f"{run_id}_model")
                model.save(model_path)
                print(f"[INFO] Model saved at: {model_path}\n")

                # Record short video
                demo_env = make_env(env_name)
                if hasattr(demo_env.unwrapped, "config"):
                    demo_env.unwrapped.config["simulation_frequency"] = 15

                frames = []
                obs, info = demo_env.reset()
                done = truncated = False
                for step in range(300):
                    action, _ = model.predict(obs, deterministic=True)
                    obs, reward, done, truncated, info = demo_env.step(action)
                    frames.append(demo_env.render())
                    if done or truncated:
                        obs, info = demo_env.reset()
                demo_env.close()

                video_file = os.path.join(VIDEO_DIR, f"{run_id}_{int(time.time())}.mp4")
                record_with_opencv(frames, out_file=video_file, fps=15)
                show_video(video_file)

                results[run_id] = {
                    "env": env_name,
                    "lr": lr,
                    "gamma": gm,
                    "mean_reward": mean_reward,
                    "std_reward": std_reward,
                    "success_rate": success_rate
                }

                # Display TENSORBOARD plot for this run using logs file
                if IN_JUPYTER:
                    get_ipython().run_line_magic('load_ext', 'tensorboard')
                    get_ipython().run_line_magic('tensorboard', '--logdir ./Project_A2C/logs')

    print("\nFinal Result of All model trained Above\n")
    for i in results:
        print(i)


TRAINING RUN: highway-fast-v0_A2C_lr0.0005_gm0.95
[INFO] Completed run for env=highway-fast-v0, gamma=0.95, lr=0.0005
   Final average reward: 8.49
   Success rate: 0.30 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/highway-fast-v0_A2C_lr0.0005_gm0.95_model



2025-03-17 02:36:29.087 python[45071:2415797] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-17 02:36:29.087 python[45071:2415797] +[IMKInputSession subclass]: chose IMKInputSession_Modern


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/highway-fast-v0_A2C_lr0.0005_gm0.95_1742197007.mp4


TRAINING RUN: highway-fast-v0_A2C_lr0.0005_gm0.8
[INFO] Completed run for env=highway-fast-v0, gamma=0.8, lr=0.0005
   Final average reward: 14.10
   Success rate: 0.82 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/highway-fast-v0_A2C_lr0.0005_gm0.8_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/highway-fast-v0_A2C_lr0.0005_gm0.8_1742198446.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 0:23:55 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: highway-fast-v0_A2C_lr0.001_gm0.95
[INFO] Completed run for env=highway-fast-v0, gamma=0.95, lr=0.001
   Final average reward: 14.25
   Success rate: 0.25 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/highway-fast-v0_A2C_lr0.001_gm0.95_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/highway-fast-v0_A2C_lr0.001_gm0.95_1742199872.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 0:47:40 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: highway-fast-v0_A2C_lr0.001_gm0.8
[INFO] Completed run for env=highway-fast-v0, gamma=0.8, lr=0.001
   Final average reward: 7.80
   Success rate: 0.48 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/highway-fast-v0_A2C_lr0.001_gm0.8_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/highway-fast-v0_A2C_lr0.001_gm0.8_1742201308.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 1:11:36 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: intersection-v0_A2C_lr0.0005_gm0.95


  logger.deprecation(


[INFO] Completed run for env=intersection-v0, gamma=0.95, lr=0.0005
   Final average reward: -0.20
   Success rate: 0.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/intersection-v0_A2C_lr0.0005_gm0.95_model



  logger.deprecation(


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/intersection-v0_A2C_lr0.0005_gm0.95_1742203838.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 1:53:47 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: intersection-v0_A2C_lr0.0005_gm0.8


  logger.deprecation(


[INFO] Completed run for env=intersection-v0, gamma=0.8, lr=0.0005
   Final average reward: -0.20
   Success rate: 0.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/intersection-v0_A2C_lr0.0005_gm0.8_model



  logger.deprecation(


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/intersection-v0_A2C_lr0.0005_gm0.8_1742206362.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 2:35:51 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: intersection-v0_A2C_lr0.001_gm0.95


  logger.deprecation(


[INFO] Completed run for env=intersection-v0, gamma=0.95, lr=0.001
   Final average reward: 0.40
   Success rate: -2.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/intersection-v0_A2C_lr0.001_gm0.95_model



  logger.deprecation(


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/intersection-v0_A2C_lr0.001_gm0.95_1742208888.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 3:17:57 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: intersection-v0_A2C_lr0.001_gm0.8


  logger.deprecation(


[INFO] Completed run for env=intersection-v0, gamma=0.8, lr=0.001
   Final average reward: -0.20
   Success rate: 0.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/intersection-v0_A2C_lr0.001_gm0.8_model



  logger.deprecation(


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/intersection-v0_A2C_lr0.001_gm0.8_1742211425.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 4:00:14 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: roundabout-v0_A2C_lr0.0005_gm0.95
[INFO] Completed run for env=roundabout-v0, gamma=0.95, lr=0.0005
   Final average reward: 6.73
   Success rate: 0.72 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/roundabout-v0_A2C_lr0.0005_gm0.95_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/roundabout-v0_A2C_lr0.0005_gm0.95_1742212954.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 4:25:43 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: roundabout-v0_A2C_lr0.0005_gm0.8
[INFO] Completed run for env=roundabout-v0, gamma=0.8, lr=0.0005
   Final average reward: 4.25
   Success rate: 0.33 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/roundabout-v0_A2C_lr0.0005_gm0.8_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/roundabout-v0_A2C_lr0.0005_gm0.8_1742214479.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 4:51:08 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: roundabout-v0_A2C_lr0.001_gm0.95
[INFO] Completed run for env=roundabout-v0, gamma=0.95, lr=0.001
   Final average reward: 6.33
   Success rate: 0.58 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/roundabout-v0_A2C_lr0.001_gm0.95_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/roundabout-v0_A2C_lr0.001_gm0.95_1742216015.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 5:16:44 ago. (Use '!kill 45311' to kill it.)

TRAINING RUN: roundabout-v0_A2C_lr0.001_gm0.8
[INFO] Completed run for env=roundabout-v0, gamma=0.8, lr=0.001
   Final average reward: 5.20
   Success rate: 0.42 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/models/roundabout-v0_A2C_lr0.001_gm0.8_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/Project_A2C/Project_A2C/videos/roundabout-v0_A2C_lr0.001_gm0.8_1742217545.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 45311), started 5:42:14 ago. (Use '!kill 45311' to kill it.)


Final Result of All model trained Above

highway-fast-v0_A2C_lr0.0005_gm0.95
highway-fast-v0_A2C_lr0.0005_gm0.8
highway-fast-v0_A2C_lr0.001_gm0.95
highway-fast-v0_A2C_lr0.001_gm0.8
intersection-v0_A2C_lr0.0005_gm0.95
intersection-v0_A2C_lr0.0005_gm0.8
intersection-v0_A2C_lr0.001_gm0.95
intersection-v0_A2C_lr0.001_gm0.8
roundabout-v0_A2C_lr0.0005_gm0.95
roundabout-v0_A2C_lr0.0005_gm0.8
roundabout-v0_A2C_lr0.001_gm0.95
roundabout-v0_A2C_lr0.001_gm0.8


In [10]:
#To kill the prvious tensboard
os.system("pkill -f tensorboard")
# Display TENSORBOARD plot for this run 
if IN_JUPYTER:
    get_ipython().run_line_magic('load_ext', 'tensorboard')
    get_ipython().run_line_magic('tensorboard', '--logdir ./Project_A2C/logs')

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


🚀 TensorBoard is running at http://localhost:6015/
