# **Study on the Ideal Speed for Highway Merging**

##### This study aims to determine the optimal speed for safely and efficiently merging onto highways. The process begins by evaluating a range of speed intervals to assess which performs best in terms of safety and traffic flow, by changing it in the 'reward_speed_range' from the environment's configuration. Afterward, the range of this interval will be progressively narrowed down to pinpoint the exact speed(s) that offer the best results, either identifying a single optimal speed or multiple ones if no clear standout emerges.

### **Imports**

In [1]:
import gymnasium as gym
from matplotlib import pyplot as plt
import cv2
import pprint
from IPython.display import Video
import highway_env
from highway_env import utils
from highway_env.envs import MergeEnv
import pandas as pd
import time
import numpy as np
from stable_baselines3 import PPO
import imageio
%matplotlib inline

### **Creation of the environment**

##### With the ego-vehicle on the merging lane and the following Speed Ranges:[0,10], [10,20], [20,30] (the default), [30,40], [40,50], [50,60]


In [4]:
class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        """
        Populate a road with several vehicles on the highway and an ego-vehicle on the merging lane.

        The ego-vehicle will only be placed on the merging lane, while other vehicles will be on the highway lanes.

        :return: the ego-vehicle
        """
        road = self.road
        
        # Ego vehicle on the merging lane
        ego_vehicle = self.action_type.vehicle_class(
            road, road.network.get_lane(("j", "k", 0)).position(30, 0), speed=20  # Merging lane
        )
        road.vehicles.append(ego_vehicle)

        other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])

        # Create multiple vehicles on the highway lanes
        for position, speed in [(90, 29), (70, 31), (5, 31.5), (120, 28), (50, 32)]:
            lane = road.network.get_lane(("a", "b", self.np_random.integers(2))) 
            position = lane.position(position + self.np_random.uniform(-5, 5), 0)  
            speed += self.np_random.uniform(-1, 1) 
            road.vehicles.append(other_vehicles_type(road, position, speed=speed))

        # Set the ego vehicle as the primary vehicle
        self.vehicle = ego_vehicle


In [5]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv', 
)

### **Merging velocity interval [0,10]**

In [6]:
# Creating an instance of the custom environment
env_0_10 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [0, 10]})

In [None]:
model = PPO('MlpPolicy', env_0_10,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_0_10/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_0_10/model")

### **Merging velocity interval [10,20]**

In [7]:
# Creating an instance of the custom environment
env_10_20 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [10,20]})

In [None]:
model = PPO('MlpPolicy', env_10_20,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_10_20/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_10_20/model")

### **Merging velocity interval [20,30]**

In [8]:
# Creating an instance of the custom environment
env_20_30 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [20,30]})

In [None]:
model = PPO('MlpPolicy', env_20_30,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_20_30/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_20_30/model")

### **Merging velocity interval [30,40]**

In [9]:
# Creating an instance of the custom environment
env_30_40 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [30, 40]})

In [None]:
model = PPO('MlpPolicy', env_30_40,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_30_40/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_30_40/model")

### **Merging velocity interval [40,50]**

In [10]:
# Creating an instance of the custom environment
env_40_50 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [40, 50]})

In [None]:
model = PPO('MlpPolicy', env_40_50,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_40_50/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_40_50/model")

### **Merging velocity interval [50,60]**

In [11]:
# Creating an instance of the custom environment
env_50_60 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [50, 60]})

In [None]:
model = PPO('MlpPolicy', env_50_60,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_50_60/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_50_60/model")

### **Evaluate and compare the models**

**For the speed range (0,10)**
- Average Reward: 16.7068
- Average Steps to Merge: 14.64
- Average Episode Time: 0.40 seconds
- Number of Collisions: 6
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 200

**For the speed range (10,20)**
- Average Reward: 15.0820
- Average Steps to Merge: 14.62
- Average Episode Time: 0.22 seconds
- Number of Collisions: 3
- Successful Merges: 197
- Number of Dangerous Driving Episodes (sudden speed changes): 3

**For the speed range (20,30)**
- Average Reward: 13.4478
- Average Steps to Merge: 14.62
- Average Episode Time: 0.21 seconds
- Number of Collisions: 6
- Successful Merges: 194
- Number of Dangerous Driving Episodes (sudden speed changes): 6

**For the speed range (30,40)**
- Average Reward: 11.8590
- Average Steps to Merge: 14.63
- Average Episode Time: 0.22 seconds
- Number of Collisions: 2
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 2

**For the speed range (40,50)**
- Average Reward: 9.9397
- Average Steps to Merge: 14.24
- Average Episode Time: 0.21 seconds
- Number of Collisions: 22
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 60

**For the speed range (50,60)**
- Average Reward: 8.5367
- Average Steps to Merge: 14.48
- Average Episode Time: 0.21 seconds
- Number of Collisions: 1
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 1

In [27]:
# Function to evaluate the agent and collect metrics
def evaluate_agent(model, env, num_episodes, success_speed_range, speed_threshold_ratio=0.5):
    total_rewards = []  # List to store total rewards for each episode
    total_collisions = 0  # Counter for total collisions across all episodes
    successful_merges = 0  # Counter for successful merges
    dangerous_driving_episodes = 0  # Counter for episodes with dangerous driving behavior
    total_steps_to_merge = []  # List to store the number of steps taken to merge in each episode
    total_episode_times = []  # List to store the time taken for each episode

    # Calculate the threshold based on the speed range
    reward_speed_range = env.unwrapped.config["reward_speed_range"]
    speed_threshold = (reward_speed_range[1] - reward_speed_range[0]) * speed_threshold_ratio  # Speed threshold for detecting sudden changes

    for episode in range(num_episodes):
        start_time = time.time()  # Record the start time of the episode
        obs, info = env.reset()  # Reset the environment and get the initial observation
        done = False  # Variable to track if the episode is finished
        episode_reward = 0  # Variable to track the reward for the current episode
        collisions = 0  # Counter for collisions in the current episode
        dangerous_driving = False  # Flag to indicate if dangerous driving occurred
        steps_to_merge = 0  # Counter for steps taken to merge
        last_speed = None  # Initialize last speed as None

        while not done:  # Loop until the episode is done
            # The agent chooses an action
            action, _states = model.predict(obs, deterministic=True)
            # Execute the action in the environment
            obs, reward, terminated, truncated, info = env.step(action)

            episode_reward += reward  # Accumulate reward for the episode
            steps_to_merge += 1  # Increment the steps to merge counter

            # Check the current speed and round to 2 decimal places
            current_speed = round(info.get('speed', 0), 2)

            # Check for sudden speed changes
            if last_speed is not None and abs(current_speed - last_speed) > speed_threshold:
                dangerous_driving = True  # Mark as dangerous driving if speed change exceeds threshold

            last_speed = current_speed  # Update the last speed for the next iteration

            # Check for collisions
            if 'crashed' in info and info['crashed']:
                collisions += 1  # Increment collision counter if a crash occurred

            # Check if the episode has ended (either 'terminated' or 'truncated')
            done = terminated or truncated

            ego_vehicle = env.unwrapped.vehicle  # Get the ego vehicle

            # Consider an episode a success if there were no collisions and final speed is within the desired range
            if not collisions and success_speed_range[0] <= current_speed <= success_speed_range[1] and tuple(ego_vehicle.lane_index[:2]) in {("b", "c"), ("c", "d")}:
                successful_merges += 1  # Increment successful merges if criteria met
                done = True  # End the episode

        # Log episode metrics
        total_rewards.append(episode_reward)  # Add episode reward to the total rewards list
        total_collisions += collisions  # Update total collisions count
        total_steps_to_merge.append(steps_to_merge)  # Add steps to merge for this episode

        if dangerous_driving:
            dangerous_driving_episodes += 1  # Increment count of dangerous driving episodes

        # Calculate the time taken for the episode and add to the list
        episode_time = time.time() - start_time  # Calculate elapsed time
        total_episode_times.append(episode_time)  # Add episode time to the list

    # Final metric calculations
    avg_reward = np.mean(total_rewards)  # Calculate average reward
    avg_steps_to_merge = np.mean(total_steps_to_merge)  # Calculate average steps to merge
    avg_episode_time = np.mean(total_episode_times)  # Calculate average episode time

    # Display results
    print(f"Average Reward: {avg_reward}")  # Print average reward
    print(f"Average Steps to Merge: {avg_steps_to_merge}")  # Print average steps to merge
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")  # Print average episode time
    print(f"Number of Collisions: {total_collisions}")  # Print total collisions
    print(f"Successful Merges: {successful_merges}")  # Print number of successful merges
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")  # Print count of dangerous driving episodes

    return {
        "avg_reward": avg_reward,  # Return average reward
        "avg_steps_to_merge": avg_steps_to_merge,  # Return average steps to merge
        "avg_episode_time": avg_episode_time,  # Return average episode time
        "number_collisions": total_collisions,  # Return total number of collisions
        "successful_merges": successful_merges,  # Return number of successful merges
        "number_dangerous_episodes": dangerous_driving_episodes  # Return number of dangerous driving episodes
    }

In [28]:
# Load the trained model
model = PPO.load("vel_study_0_10/model")  

# Evaluate the model
results = evaluate_agent(model, env_0_10, 200, (0, 10)) 



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [29]:
# Load the trained model
model = PPO.load("vel_study_10_20/model")

# Evaluate the model
results = evaluate_agent(model, env_10_20, 200, (10, 20))



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
ov

In [19]:
# Load the trained model
model = PPO.load("vel_study_20_30/model")

# Evaluate the model
results = evaluate_agent(model, env_20_30, 200, (20, 30))



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [20]:
# Load the trained model
model = PPO.load("vel_study_30_40/model")

# Evaluate the model
results = evaluate_agent(model, env_30_40, 200, (30, 40))

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse




crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [21]:
# Load the trained model
model = PPO.load("vel_study_40_50/model")

# Evaluate the model
results = evaluate_agent(model, env_40_50, 200, (40, 50))



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

KeyboardInterrupt: 

In [None]:
# Load the trained model
model = PPO.load("vel_study_50_60/model")

# Evaluate the model
results = evaluate_agent(model, env_50_60, 200, (50, 60))

**Analysis of Driving Agent Evaluation Results**

The evaluation of the driving agent across different speed ranges reveals notable variations in performance, with clear trends as speed increases.

Starting with the **(0, 10)** range, the agent achieves the **highest average reward of 16.7275**, indicating a relatively effective strategy for accumulating rewards at low speeds. However, the agent records **0 successful merges** and **3 collisions**, suggesting an inability to execute highway merges effectively. The low number of **3 dangerous driving episodes** and the **average episode time of 0.21 seconds** imply that while the agent avoids frequent erratic behavior, it remains inefficient at successfully navigating merging scenarios.

In the **(10, 20)** range, the agent demonstrates **significant improvement in performance**. With an **average reward of 15.0820**, it achieves **197 successful merges** and maintains a **low collision count of 3**. The number of **3 dangerous driving episodes** indicates **consistent behavior**, and the **average episode time of 0.22 seconds** suggests **stable decision-making**. This range appears to balance both **safety and efficiency effectively**.

Moving to the **(20, 30)** range, the agent’s **average reward drops to 13.4478**, reflecting **increased difficulty at higher speeds**. However, it still manages **194 successful merges**, showing that the agent **remains capable of navigating merges**. The **collision count rises to 6**, and **6 dangerous driving episodes** highlight **growing instability**. The **average episode time remains consistent at 0.21 seconds**, suggesting that timing is not a primary issue.

In the **(30, 40)** range, **performance continues to decline**. The **average reward falls to 11.8590**, with **0 successful merges** recorded. Although the agent experiences only **2 collisions**, the **absence of successful merges** indicates a **critical failure in executing merging tasks at this speed**. The **2 dangerous driving episodes** suggest **minimal instability**, but the **inability to merge effectively remains the central concern**.

The **(40, 50)** range presents **significant challenges**, with an **average reward of 9.9397** and **0 successful merges**. The **collision count spikes to 22**, and the number of **60 dangerous driving episodes** indicates **severe instability in driving behavior**. The **average steps to merge decrease slightly to 14.24**, but this does not translate into **improved performance**.

Finally, in the **(50, 60)** range, the agent records its **lowest average reward of 8.5367**, with **0 successful merges** and just **1 collision**. Interestingly, there is only **1 dangerous driving episode**, suggesting that the agent struggles **not due to instability but possibly due to overly conservative or ineffective decision-making at this speed**.

Overall, the **(10, 20)** speed range emerges as the **optimal balance between safety and efficiency**. It achieves a **high number of successful merges**, maintains **low collision rates**, and exhibits **stable behavior**. In contrast, while the **(0, 10)** range achieves the **highest average reward**, it **fails to deliver successful merges**, limiting its practical utility. Beyond **20**, increasing speeds introduce **more collisions**, **dangerous driving episodes**, and a **steep decline in successful merges**, indicating that the agent is **not well-equipped to handle higher-speed scenarios effectively**. Therefore, the **(10, 20)** range stands out as the **most effective operational window for the driving agent**.

### **Narrowing the (10,20) interval**

##### In order to identify the optimal speed range within the broader interval of (10, 20), a process of gradual refinement was employed. By breaking this interval into smaller subintervals of 0.5, such as (10, 10.5), (10.5, 11), and so on, each subinterval is evaluated independently using the same metrics as before. The goal of this narrowing process is to identify which specific speed range yields the highest rewards, minimizes collisions, and reduces dangerous driving episodes. By successively refining these subintervals and analyzing the results, we can pinpoint the exact optimal speed range where the agent performs most efficiently and safely. This step-by-step method ensures that performance is maximized within the interval of interest.

In [None]:
# Modified function to evaluate multiple speed subintervals of 0.5
def evaluate_speed_intervals(model, env, base_interval, step_size, num_episodes, speed_threshold_ratio=0.3):
    # Divide the base interval (10, 20) into subintervals of 'step_size' (0.5 in this case)
    subintervals = [(base_interval[0] + i, base_interval[0] + i + step_size) for i in np.arange(0, base_interval[1] - base_interval[0], step_size)]
    
    results = {}  # Initialize a dictionary to store results for each subinterval
    
    for subinterval in subintervals:
        print(f"Evaluating for speed range {subinterval}")  # Print the current subinterval being evaluated
        result = evaluate_agent(model, env, num_episodes, subinterval, speed_threshold_ratio)  # Evaluate the agent for the current subinterval
        results[subinterval] = result  # Store the result in the dictionary
        print("\n")  # Spacing between results for readability

    return results  # Return all results after evaluations

# Parameters for the base interval and the size of subintervals
base_interval = (10, 20)  # Define the base speed interval
step_size = 0.5  # Define the size of each subinterval
model = PPO.load("vel_study_10_20/model", custom_objects={"observation_space": env_10_20.observation_space, "action_space": env_10_20.action_space}) # Load the model from the specified path

# Evaluate the model in subintervals of 0.5
results = evaluate_speed_intervals(model, env_10_20, base_interval, step_size, num_episodes=200)  # Call the function to evaluate the model

In [None]:
# Print the results of the evaluations
print(f"Results: {results}")

In [None]:
# Create a list to hold the data
data = [] 

# Iterate through the results and flatten the structure
for speed_range, metrics in results.items():  # Loop through each speed range and its corresponding metrics
    row = {'Speed Range': f"{speed_range[0]} - {speed_range[1]}"}  # Create a new dictionary for the current row with the speed range
    row.update(metrics)  # Add metrics to the row dictionary
    data.append(row)  # Append the row to the data list

# Create a DataFrame from the list of dictionaries
results_df = pd.DataFrame(data)  # Convert the list of dictionaries into a Pandas DataFrame

results_df  # Display the DataFrame

**Analysis of Speed Ranges**

- **Performance Consistency:** The average rewards across the evaluated speed ranges remain relatively stable, with values generally falling between **14.6** and **15.2**. This suggests that the agent maintains consistent behavior across different speed intervals, despite variations in other performance metrics.

- **Collision Metrics:** The **19.5 - 20.0** speed range stands out with **197 successful merges**, far exceeding any other range. It also records **3 collisions** and **3 dangerous driving episodes**, indicating a balance between efficiency and safety. In contrast, other ranges, such as **10.5 - 11.0** and **13.0 - 13.5**, reported higher collision rates despite similar average rewards.

- **Average Steps to Merge:** Across most ranges, the agent required approximately **14.6 steps on average** to complete a merge. The **15.5 - 16.0** range showed slightly higher inefficiency with an average of **14.9 steps**, hinting at difficulties in decision-making at this speed.

- **Time Efficiency:** The **19.5 - 20.0** range achieved an average episode time of **0.460 seconds**, demonstrating effective and time-efficient merging behavior. Comparatively, lower speed ranges such as **10.0 - 10.5** exhibited longer episode times, suggesting slower merging processes.

- **Dangerous Driving Episodes:** Dangerous episodes were relatively low across most speed ranges, with occasional spikes in ranges like **14.0 - 14.5** (**7 dangerous episodes**) and **17.5 - 18.0** (**6 dangerous episodes**). The **19.5 - 20.0** range maintained a minimal count of **3 dangerous episodes**, aligning with its overall performance stability.

In conclusion, the **19.5 - 20.0 speed range emerges as the optimal choice for merging scenarios**. It demonstrates the highest number of successful merges, minimal collisions, and consistent time efficiency, making it the most effective operational window for the driving agent.

In [22]:
# Load the trained model
model = PPO.load("vel_study_10_20/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_10_20.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_10_20.step(action)
    frame = env_10_20.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_10_20.close()

# Save the frames as a video
video_filename = "velocity_study.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
Video saved as velocity_study.mp4


In [23]:
# Display the video
video_filename = "velocity_study.mp4"
Video(video_filename, embed=True)