# Final Project: Reinforcement Learning for Autonomous Driving in Highway-Env
# Course: CSC 580 - Artificial Intelligence II
## Group-10
### Members:
 ###    - Sanjay Uppala (2164383)
###  - Yogitha Pollisetty (2130220)
### DePaul University

# Installing required poackages 

In [1]:
!pip install highway-env 
!pip install stable-baselines3 
!pip install gymnasium 
!pip install opencv-python
!pip install tensorboard



In [2]:
#Importing Packages 
import os
import time
import glob
import io
import base64
import numpy as np
import gymnasium as gym
import highway_env
import cv2
from stable_baselines3 import DQN,PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from IPython import get_ipython
from IPython.display import HTML, display
from IPython import get_ipython
try:
    IN_JUPYTER = True
except ImportError:
    IN_JUPYTER = False
    
if IN_JUPYTER:


# 2) Directory Setup

In [3]:
PROJECT_DIR = os.path.join(os.getcwd(), "Project_PPO")
LOG_DIR = os.path.join(PROJECT_DIR, "logs")
MODEL_DIR = os.path.join(PROJECT_DIR, "models")
VIDEO_DIR = os.path.join(PROJECT_DIR, "videos")

for folder in [PROJECT_DIR, LOG_DIR, MODEL_DIR, VIDEO_DIR]:
    os.makedirs(folder, exist_ok=True)

# 3) Utility: show_video in Jupyter

In [4]:
def show_video(video_path_pattern):
    ''' 
        Display the latest video in the Notebook
    '''
    mp4_list = glob.glob(video_path_pattern)
    if not mp4_list:
        print(f"[WARN] No video found for: {video_path_pattern}")
        return
    video_path = mp4_list[0]
    with open(video_path, "rb") as f:
        video_data = f.read()
    encoded_video = base64.b64encode(video_data).decode("ascii")
    if IN_JUPYTER:
        display(HTML(data=f'''
            <video width="640" height="480" controls>
                <source src="data:video/mp4;base64,{encoded_video}" type="video/mp4">
            </video>
        '''))
    else:
        print(f"[INFO] Video saved: {video_path}")

# 4) Environment Creation

In [5]:
def make_env(env_name="highway-fast-v0"):
    '''
    Created a Configure for the Enviornment using Gym for traning purpose
    '''
    env = gym.make(env_name, render_mode="rgb_array")
    if hasattr(env.unwrapped, "configure"):
        env.unwrapped.configure({
            "lanes_count": 4,
            "vehicles_count": 20,
            "duration": 40,
            "collision_reward": -2,
            "high_speed_reward": 1,
            "reward_speed_range": [20, 30],
            "action": {"type": "DiscreteMetaAction"}
        })
    return Monitor(env)

# 5) Video Recording with OpenCV

In [6]:
def record_with_opencv(frames, out_file="demo.mp4", fps=15):
    '''
    OpennCv package has been used to record the video in 3 format - MP4,AVC1,XVID
    '''
    if not frames:
        print("[WARN] No frames to record.")
        return
    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'avc1') 
    out = cv2.VideoWriter(out_file, fourcc, fps, (width, height))
    for frame in frames:
        bgr_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(bgr_frame)
    out.release()
    print(f"[INFO] Created video: {out_file}")

# 6) Traing PPO MOdel

In [7]:
if __name__ == "__main__":
    ENVIRONMENTS = ["highway-fast-v0", "intersection-v0", "roundabout-v0"]
    LEARNING_RATES = [0.0005, 0.001]
    GAMMAS = [0.95, 0.8]

    TOTAL_TIMESTEPS = 10000  

    # dictionary to store final results
    results = {}

    for env_name in ENVIRONMENTS:
        for lr in LEARNING_RATES:
            for gm in GAMMAS:
                # Unique run_id
                run_id = f"{env_name}_PPO_lr{lr}_gm{gm}"
                print(f"TRAINING RUN: {run_id}")

                # Create environment
                vec_env = make_env(env_name)

                tb_log_dir = os.path.join(LOG_DIR, run_id)

                # Create PPO with chosen LR, gamma
                model = PPO(
                    "MlpPolicy",
                    vec_env,
                    learning_rate=lr,
                    gamma=gm,
                    n_steps=512,
                    batch_size=64,
                    n_epochs=10,
                    clip_range=0.2,
                    verbose=0,
                    tensorboard_log=tb_log_dir
                )

                # Train
                model.learn(total_timesteps=TOTAL_TIMESTEPS)

                # Evaluate
                mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=5)

                if mean_reward > 0:
                    success_rate = (mean_reward - std_reward) / mean_reward
                else:
                    success_rate = 0

                print(f" Completed run for env={env_name}, gamma={gm}, lr={lr}")
                print(f"   Final average reward: {mean_reward:.2f}")
                print(f"   Success rate: {success_rate:.2f} \n")

                # Save model
                model_path = os.path.join(MODEL_DIR, f"{run_id}_model")
                model.save(model_path)
                print(f"[INFO] Model saved at: {model_path}\n")

                # Record short video
                demo_env = make_env(env_name)
                if hasattr(demo_env.unwrapped, "config"):
                    demo_env.unwrapped.config["simulation_frequency"] = 15

                frames = []
                obs, info = demo_env.reset()
                done = truncated = False
                for step in range(300):
                    action, _ = model.predict(obs, deterministic=True)
                    obs, reward, done, truncated, info = demo_env.step(action)
                    frames.append(demo_env.render())
                    if done or truncated:
                        obs, info = demo_env.reset()
                demo_env.close()

                video_file = os.path.join(VIDEO_DIR, f"{run_id}_{int(time.time())}.mp4")
                record_with_opencv(frames, out_file=video_file, fps=15)
                show_video(video_file)

                results[run_id] = {
                    "env": env_name,
                    "lr": lr,
                    "gamma": gm,
                    "mean_reward": mean_reward,
                    "std_reward": std_reward,
                    "success_rate": success_rate
                }

                # Display TENSORBOARD plot for this run usnig logs files
                if IN_JUPYTER:
                    get_ipython().run_line_magic('load_ext', 'tensorboard')
                    get_ipython().run_line_magic('tensorboard', '--logdir ./Project_PPO/logs')

    print("\nFinal Result of All model trained Above\n")
    for i in results:
        print(i)


TRAINING RUN: highway-fast-v0_PPO_lr0.0005_gm0.95
[INFO] Completed run for env=highway-fast-v0, gamma=0.95, lr=0.0005
   Final average reward: 27.78
   Success rate: 0.98 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/highway-fast-v0_PPO_lr0.0005_gm0.95_model



2025-03-15 21:48:40.113 python[25677:1370912] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-15 21:48:40.113 python[25677:1370912] +[IMKInputSession subclass]: chose IMKInputSession_Modern


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/highway-fast-v0_PPO_lr0.0005_gm0.95_1742093338.mp4


TRAINING RUN: highway-fast-v0_PPO_lr0.0005_gm0.8
[INFO] Completed run for env=highway-fast-v0, gamma=0.8, lr=0.0005
   Final average reward: 23.16
   Success rate: 0.56 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/highway-fast-v0_PPO_lr0.0005_gm0.8_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/highway-fast-v0_PPO_lr0.0005_gm0.8_1742094755.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 0:23:36 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: highway-fast-v0_PPO_lr0.001_gm0.95
[INFO] Completed run for env=highway-fast-v0, gamma=0.95, lr=0.001
   Final average reward: 29.28
   Success rate: 0.95 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/highway-fast-v0_PPO_lr0.001_gm0.95_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/highway-fast-v0_PPO_lr0.001_gm0.95_1742096158.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 0:46:59 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: highway-fast-v0_PPO_lr0.001_gm0.8
[INFO] Completed run for env=highway-fast-v0, gamma=0.8, lr=0.001
   Final average reward: 32.16
   Success rate: 0.95 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/highway-fast-v0_PPO_lr0.001_gm0.8_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/highway-fast-v0_PPO_lr0.001_gm0.8_1742097591.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 1:10:52 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: intersection-v0_PPO_lr0.0005_gm0.95


  logger.deprecation(


[INFO] Completed run for env=intersection-v0, gamma=0.95, lr=0.0005
   Final average reward: -0.20
   Success rate: 0.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/intersection-v0_PPO_lr0.0005_gm0.95_model



  logger.deprecation(


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/intersection-v0_PPO_lr0.0005_gm0.95_1742100127.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 1:53:09 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: intersection-v0_PPO_lr0.0005_gm0.8


  logger.deprecation(


[INFO] Completed run for env=intersection-v0, gamma=0.8, lr=0.0005
   Final average reward: -1.00
   Success rate: 0.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/intersection-v0_PPO_lr0.0005_gm0.8_model



  logger.deprecation(


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/intersection-v0_PPO_lr0.0005_gm0.8_1742102665.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 2:35:27 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: intersection-v0_PPO_lr0.001_gm0.95


  logger.deprecation(


[INFO] Completed run for env=intersection-v0, gamma=0.95, lr=0.001
   Final average reward: 0.20
   Success rate: -4.83 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/intersection-v0_PPO_lr0.001_gm0.95_model



  logger.deprecation(


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/intersection-v0_PPO_lr0.001_gm0.95_1742105194.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 3:17:36 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: intersection-v0_PPO_lr0.001_gm0.8


  logger.deprecation(


[INFO] Completed run for env=intersection-v0, gamma=0.8, lr=0.001
   Final average reward: -0.20
   Success rate: 0.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/intersection-v0_PPO_lr0.001_gm0.8_model



  logger.deprecation(


[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/intersection-v0_PPO_lr0.001_gm0.8_1742107729.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 3:59:51 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: roundabout-v0_PPO_lr0.0005_gm0.95
[INFO] Completed run for env=roundabout-v0, gamma=0.95, lr=0.0005
   Final average reward: 6.67
   Success rate: 0.65 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/roundabout-v0_PPO_lr0.0005_gm0.95_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/roundabout-v0_PPO_lr0.0005_gm0.95_1742109255.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 4:25:17 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: roundabout-v0_PPO_lr0.0005_gm0.8
[INFO] Completed run for env=roundabout-v0, gamma=0.8, lr=0.0005
   Final average reward: 7.67
   Success rate: 1.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/roundabout-v0_PPO_lr0.0005_gm0.8_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/roundabout-v0_PPO_lr0.0005_gm0.8_1742110818.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 4:51:20 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: roundabout-v0_PPO_lr0.001_gm0.95
[INFO] Completed run for env=roundabout-v0, gamma=0.95, lr=0.001
   Final average reward: 5.46
   Success rate: 0.48 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/roundabout-v0_PPO_lr0.001_gm0.95_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/roundabout-v0_PPO_lr0.001_gm0.95_1742112391.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 5:17:33 ago. (Use '!kill 26105' to kill it.)

TRAINING RUN: roundabout-v0_PPO_lr0.001_gm0.8
[INFO] Completed run for env=roundabout-v0, gamma=0.8, lr=0.001
   Final average reward: 7.67
   Success rate: 1.00 

[INFO] Model saved at: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/models/roundabout-v0_PPO_lr0.001_gm0.8_model

[INFO] Created video: /Users/sanjayuppala/Desktop/CSC 580 AI_2/projrct PPO/Project_PPO/videos/roundabout-v0_PPO_lr0.001_gm0.8_1742113961.mp4


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 26105), started 5:43:43 ago. (Use '!kill 26105' to kill it.)


Final Result of All model trained Above

highway-fast-v0_PPO_lr0.0005_gm0.95
highway-fast-v0_PPO_lr0.0005_gm0.8
highway-fast-v0_PPO_lr0.001_gm0.95
highway-fast-v0_PPO_lr0.001_gm0.8
intersection-v0_PPO_lr0.0005_gm0.95
intersection-v0_PPO_lr0.0005_gm0.8
intersection-v0_PPO_lr0.001_gm0.95
intersection-v0_PPO_lr0.001_gm0.8
roundabout-v0_PPO_lr0.0005_gm0.95
roundabout-v0_PPO_lr0.0005_gm0.8
roundabout-v0_PPO_lr0.001_gm0.95
roundabout-v0_PPO_lr0.001_gm0.8


In [18]:
!pkill -f tensorboard  # For Linux/macOS


In [19]:
%load_ext tensorboard
%tensorboard --logdir ./Project_PPO/logs --port=6006


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
