# **Study on the Ideal Speed for Highway Merging**

##### This study aims to determine the optimal speed for safely and efficiently merging onto highways. The process begins by evaluating a range of speed intervals to assess which performs best in terms of safety and traffic flow, by changing it in the 'reward_speed_range' from the environment's configuration. Afterward, the range of this interval will be progressively narrowed down to pinpoint the exact speed(s) that offer the best results, either identifying a single optimal speed or multiple ones if no clear standout emerges.

### **Imports**

In [32]:
import gymnasium as gym
from matplotlib import pyplot as plt
import cv2
import pprint
from IPython.display import Video
import highway_env
from highway_env import utils
from highway_env.envs import MergeEnv
import pandas as pd
import time
import numpy as np
from stable_baselines3 import PPO
import imageio
%matplotlib inline

### **Creation of the environment**

##### With the ego-vehicle on the merging lane and the following Speed Ranges:[0,10], [10,20], [20,30] (the default), [30,40], [40,50], [50,60]


In [33]:
class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        """
        Populate a road with several vehicles on the highway and an ego-vehicle on the merging lane.

        The ego-vehicle will only be placed on the merging lane, while other vehicles will be on the highway lanes.

        :return: the ego-vehicle
        """
        road = self.road
        
        # Ego vehicle on the merging lane
        ego_vehicle = self.action_type.vehicle_class(
            road, road.network.get_lane(("j", "k", 0)).position(30, 0), speed=20  # Merging lane
        )
        road.vehicles.append(ego_vehicle)

        other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])

        # Create multiple vehicles on the highway lanes
        for position, speed in [(90, 29), (70, 31), (5, 31.5), (120, 28), (50, 32)]:
            lane = road.network.get_lane(("a", "b", self.np_random.integers(2))) 
            position = lane.position(position + self.np_random.uniform(-5, 5), 0)  
            speed += self.np_random.uniform(-1, 1) 
            road.vehicles.append(other_vehicles_type(road, position, speed=speed))

        # Set the ego vehicle as the primary vehicle
        self.vehicle = ego_vehicle


In [35]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv', 
)

### **Merging velocity interval [0,10]**

In [36]:
# Creating an instance of the custom environment
env_0_10 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [0, 10]})

In [None]:
model = PPO('MlpPolicy', env_0_10,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_0_10/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_0_10/model")

### **Merging velocity interval [10,20]**

In [37]:
# Creating an instance of the custom environment
env_10_20 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [10,20]})

In [None]:
model = PPO('MlpPolicy', env_10_20,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_10_20/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_10_20/model")

### **Merging velocity interval [20,30]**

In [38]:
# Creating an instance of the custom environment
env_20_30 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [20,30]})

In [None]:
model = PPO('MlpPolicy', env_20_30,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_20_30/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_20_30/model")

### **Merging velocity interval [30,40]**

In [39]:
# Creating an instance of the custom environment
env_30_40 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [30, 40]})

In [None]:
model = PPO('MlpPolicy', env_30_40,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_30_40/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_30_40/model")

### **Merging velocity interval [40,50]**

In [40]:
# Creating an instance of the custom environment
env_40_50 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [40, 50]})

In [None]:
model = PPO('MlpPolicy', env_40_50,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_40_50/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_40_50/model")

### **Merging velocity interval [50,60]**

In [41]:
# Creating an instance of the custom environment
env_50_60 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [50, 60]})

In [None]:
model = PPO('MlpPolicy', env_50_60,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_50_60/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_50_60/model")

### **Evaluate and compare the models**

**For the speed range (0,10)**
- Average Reward: 16.6739
- Average Steps to Merge: 14.59
- Average Episode Time: 0.39 seconds
- Number of Collisions: 4
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 4

**For the speed range (10,20)**
- Average Reward: 12.2107
- Average Steps to Merge: 11.73
- Average Episode Time: 0.32 seconds
- Number of Collisions: 0
- Successful Merges: 200
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the speed range (20,30)**
- Average Reward: 7.4834
- Average Steps to Merge: 8.0
- Average Episode Time: 0.22 seconds
- Number of Collisions: 0
- Successful Merges: 200
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the speed range (30,40)**
- Average Reward: 11.8741
- Average Steps to Merge: 14.66
- Average Episode Time: 0.41 seconds
- Number of Collisions: 2
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 2

**For the speed range (40,50)**
- Average Reward: 9.9972
- Average Steps to Merge: 14.32
- Average Episode Time: 0.40 seconds
- Number of Collisions: 19
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 2

**For the speed range (50,60)**
- Average Reward: 8.5307
- Average Steps to Merge: 14.49
- Average Episode Time: 0.40 seconds
- Number of Collisions: 1
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 1

In [42]:
# Function to evaluate the agent and collect metrics
def evaluate_agent(model, env, num_episodes, success_speed_range, speed_threshold_ratio=0.5):
    total_rewards = []  # List to store total rewards for each episode
    total_collisions = 0  # Counter for total collisions across all episodes
    successful_merges = 0  # Counter for successful merges
    dangerous_driving_episodes = 0  # Counter for episodes with dangerous driving behavior
    total_steps_to_merge = []  # List to store the number of steps taken to merge in each episode
    total_episode_times = []  # List to store the time taken for each episode

    # Calculate the threshold based on the speed range
    reward_speed_range = env.unwrapped.config["reward_speed_range"]
    speed_threshold = (reward_speed_range[1] - reward_speed_range[0]) * speed_threshold_ratio  # Speed threshold for detecting sudden changes

    for episode in range(num_episodes):
        start_time = time.time()  # Record the start time of the episode
        obs, info = env.reset()  # Reset the environment and get the initial observation
        done = False  # Variable to track if the episode is finished
        episode_reward = 0  # Variable to track the reward for the current episode
        collisions = 0  # Counter for collisions in the current episode
        dangerous_driving = False  # Flag to indicate if dangerous driving occurred
        steps_to_merge = 0  # Counter for steps taken to merge
        last_speed = None  # Initialize last speed as None

        while not done:  # Loop until the episode is done
            # The agent chooses an action
            action, _states = model.predict(obs, deterministic=True)
            # Execute the action in the environment
            obs, reward, terminated, truncated, info = env.step(action)

            episode_reward += reward  # Accumulate reward for the episode
            steps_to_merge += 1  # Increment the steps to merge counter

            # Check the current speed and round to 2 decimal places
            current_speed = round(info.get('speed', 0), 2)

            # Check for sudden speed changes
            if last_speed is not None and abs(current_speed - last_speed) > speed_threshold:
                dangerous_driving = True  # Mark as dangerous driving if speed change exceeds threshold

            last_speed = current_speed  # Update the last speed for the next iteration

            # Check for collisions
            if 'crashed' in info and info['crashed']:
                collisions += 1  # Increment collision counter if a crash occurred

            # Check if the episode has ended (either 'terminated' or 'truncated')
            done = terminated or truncated

            ego_vehicle = env.unwrapped.vehicle  # Get the ego vehicle

            # Consider an episode a success if there were no collisions and final speed is within the desired range
            if not collisions and success_speed_range[0] <= current_speed <= success_speed_range[1] and tuple(ego_vehicle.lane_index[:2]) in {("b", "c"), ("c", "d")}:
                successful_merges += 1  # Increment successful merges if criteria met
                done = True  # End the episode

        # Log episode metrics
        total_rewards.append(episode_reward)  # Add episode reward to the total rewards list
        total_collisions += collisions  # Update total collisions count
        total_steps_to_merge.append(steps_to_merge)  # Add steps to merge for this episode

        if dangerous_driving:
            dangerous_driving_episodes += 1  # Increment count of dangerous driving episodes

        # Calculate the time taken for the episode and add to the list
        episode_time = time.time() - start_time  # Calculate elapsed time
        total_episode_times.append(episode_time)  # Add episode time to the list

    # Final metric calculations
    avg_reward = np.mean(total_rewards)  # Calculate average reward
    avg_steps_to_merge = np.mean(total_steps_to_merge)  # Calculate average steps to merge
    avg_episode_time = np.mean(total_episode_times)  # Calculate average episode time

    # Display results
    print(f"Average Reward: {avg_reward}")  # Print average reward
    print(f"Average Steps to Merge: {avg_steps_to_merge}")  # Print average steps to merge
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")  # Print average episode time
    print(f"Number of Collisions: {total_collisions}")  # Print total collisions
    print(f"Successful Merges: {successful_merges}")  # Print number of successful merges
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")  # Print count of dangerous driving episodes

    return {
        "avg_reward": avg_reward,  # Return average reward
        "avg_steps_to_merge": avg_steps_to_merge,  # Return average steps to merge
        "avg_episode_time": avg_episode_time,  # Return average episode time
        "number_collisions": total_collisions,  # Return total number of collisions
        "successful_merges": successful_merges,  # Return number of successful merges
        "number_dangerous_episodes": dangerous_driving_episodes  # Return number of dangerous driving episodes
    }

In [43]:
# Load the trained model
model = PPO.load("vel_study_0_10/model")  

# Evaluate the model
results = evaluate_agent(model, env_0_10, 200, (0, 10)) 



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [44]:
# Load the trained model
model = PPO.load("vel_study_10_20/model")

# Evaluate the model
results = evaluate_agent(model, env_10_20, 200, (10, 20))



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
ov

In [45]:
# Load the trained model
model = PPO.load("vel_study_20_30/model")

# Evaluate the model
results = evaluate_agent(model, env_20_30, 200, (20, 30))



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
ov

In [46]:
# Load the trained model
model = PPO.load("vel_study_30_40/model")

# Evaluate the model
results = evaluate_agent(model, env_30_40, 200, (30, 40))

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse




crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [47]:
# Load the trained model
model = PPO.load("vel_study_40_50/model")

# Evaluate the model
results = evaluate_agent(model, env_40_50, 200, (40, 50))



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [48]:
# Load the trained model
model = PPO.load("vel_study_50_60/model")

# Evaluate the model
results = evaluate_agent(model, env_50_60, 200, (50, 60))

crashFalse
overFalse
crashFalse
overFalse




crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

### **Analysis of Driving Agent Evaluation Results**

The performance of the driving agent varies significantly across different speed ranges, showing notable patterns as speed increases.

In the **(0, 10)** range, the agent achieves a high **average reward of 16.6739**, suggesting it performs relatively well at low speeds. However, the agent records **0 successful merges** and **4 collisions**, indicating a failure to effectively manage merging scenarios despite its higher reward. The **4 dangerous driving episodes** highlight some instability due to sudden speed changes, but the **average episode time of 0.39 seconds** suggests that while it avoids erratic driving, the agent is still inefficient at handling merges.

In the **(10, 20)** range, the agent performs significantly better. It achieves **200 successful merges**, a stark improvement, and maintains **0 collisions**. The **average reward drops to 12.2107**, indicating a trade-off between speed and reward, but the agent remains stable, as reflected by **0 dangerous driving episodes**. The **average episode time of 0.32 seconds** shows slight delays, but overall, the agent demonstrates **consistent and effective performance** in handling merges at moderate speeds.

Moving to the **(20, 30)** range, the agent’s **average reward drops further to 7.4834**, indicating increasing difficulty at higher speeds. However, it still manages **200 successful merges** and records **0 collisions** and **0 dangerous driving episodes**, which suggests that while the agent's reward is lower, its ability to handle merges remains strong. The **average episode time of 0.22 seconds** is the shortest observed, indicating faster decision-making.

In the **(30, 40)** range, the agent’s performance begins to degrade, with the **average reward increasing to 11.8741**, but still **0 successful merges**. Despite this, the agent experiences **2 collisions** and **2 dangerous driving episodes**. These results point to an increasing difficulty in merging at higher speeds and a slight rise in instability. The **average episode time of 0.41 seconds** also suggests that decision-making may be slightly delayed under these conditions.

In the **(40, 50)** range, the agent faces **significant challenges**, with the **average reward dropping to 9.9972**. The number of **collisions spikes to 19**, and although there are **0 successful merges**, the **2 dangerous driving episodes** show some instability. The **average episode time of 0.40 seconds** remains relatively stable but does not seem to correlate with improved performance in handling merges, suggesting a deterioration in overall driving behavior at these higher speeds.

Finally, in the **(50, 60)** range, the agent achieves its **lowest average reward of 8.5307**, with **0 successful merges** and only **1 collision**. Despite the low number of collisions, the **1 dangerous driving episode** indicates some level of instability. The **average episode time of 0.40 seconds** remains consistent, but the agent’s overall ability to handle merges effectively has diminished, likely due to overly conservative decision-making or difficulty at these speeds.

### **Conclusion**

The **(10, 20)** speed range emerges as the **optimal range** for the agent, offering the best balance of **successful merges**, **low collision rates**, and **stable performance**. Despite a slight drop in **average reward** compared to the lower range, the agent demonstrates an ability to merge successfully and avoid collisions, making this range the most effective operational window.

The **(0, 10)** range, while achieving the **highest average reward**, suffers from an inability to complete successful merges, limiting its practical usefulness. Beyond **20**, as speeds increase, there is a clear decline in **successful merges**, with **rising collision counts** and **dangerous driving episodes**, indicating the agent struggles to handle higher speeds effectively. Therefore, the **(10, 20)** range stands out as the **most effective** for the driving agent.

### **Narrowing the (10,20) interval**

##### In order to identify the optimal speed range within the broader interval of (10, 20), a process of gradual refinement was employed. By breaking this interval into smaller subintervals of 0.5, such as (10, 10.5), (10.5, 11), and so on, each subinterval is evaluated independently using the same metrics as before. The goal of this narrowing process is to identify which specific speed range yields the highest rewards, minimizes collisions, and reduces dangerous driving episodes. By successively refining these subintervals and analyzing the results, we can pinpoint the exact optimal speed range where the agent performs most efficiently and safely. This step-by-step method ensures that performance is maximized within the interval of interest.

In [50]:
# Modified function to evaluate multiple speed subintervals of 0.5
def evaluate_speed_intervals(model, env, base_interval, step_size, num_episodes, speed_threshold_ratio=0.5):
    # Divide the base interval (10, 20) into subintervals of 'step_size' (0.5 in this case)
    subintervals = [(base_interval[0] + i, base_interval[0] + i + step_size) for i in np.arange(0, base_interval[1] - base_interval[0], step_size)]
    
    results = {}  # Initialize a dictionary to store results for each subinterval
    
    for subinterval in subintervals:
        print(f"Evaluating for speed range {subinterval}")  # Print the current subinterval being evaluated
        result = evaluate_agent(model, env, num_episodes, subinterval, speed_threshold_ratio)  # Evaluate the agent for the current subinterval
        results[subinterval] = result  # Store the result in the dictionary
        print("\n")  # Spacing between results for readability

    return results  # Return all results after evaluations

# Parameters for the base interval and the size of subintervals
base_interval = (10, 20)  # Define the base speed interval
step_size = 0.5  # Define the size of each subinterval
model = PPO.load("vel_study_10_20/model", custom_objects={"observation_space": env_10_20.observation_space, "action_space": env_10_20.action_space}) # Load the model from the specified path

# Evaluate the model in subintervals of 0.5
results = evaluate_speed_intervals(model, env_10_20, base_interval, step_size, num_episodes=200)  # Call the function to evaluate the model



Evaluating for speed range (10.0, 10.5)
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFal

In [51]:
# Print the results of the evaluations
print(f"Results: {results}")

Results: {(10.0, 10.5): {'avg_reward': 15.03678969629645, 'avg_steps_to_merge': 14.58, 'avg_episode_time': 0.3901611661911011, 'number_collisions': 5, 'successful_merges': 0, 'number_dangerous_episodes': 5}, (10.5, 11.0): {'avg_reward': 15.073469300945257, 'avg_steps_to_merge': 14.61, 'avg_episode_time': 0.3774153876304627, 'number_collisions': 3, 'successful_merges': 0, 'number_dangerous_episodes': 3}, (11.0, 11.5): {'avg_reward': 15.166168138476873, 'avg_steps_to_merge': 14.7, 'avg_episode_time': 0.3816871309280396, 'number_collisions': 1, 'successful_merges': 0, 'number_dangerous_episodes': 1}, (11.5, 12.0): {'avg_reward': 15.121659897141788, 'avg_steps_to_merge': 14.66, 'avg_episode_time': 0.4155530631542206, 'number_collisions': 2, 'successful_merges': 0, 'number_dangerous_episodes': 2}, (12.0, 12.5): {'avg_reward': 15.110789024571426, 'avg_steps_to_merge': 14.655, 'avg_episode_time': 0.381923131942749, 'number_collisions': 3, 'successful_merges': 0, 'number_dangerous_episodes': 3

In [52]:
# Create a list to hold the data
data = [] 

# Iterate through the results and flatten the structure
for speed_range, metrics in results.items():  # Loop through each speed range and its corresponding metrics
    row = {'Speed Range': f"{speed_range[0]} - {speed_range[1]}"}  # Create a new dictionary for the current row with the speed range
    row.update(metrics)  # Add metrics to the row dictionary
    data.append(row)  # Append the row to the data list

# Create a DataFrame from the list of dictionaries
results_df = pd.DataFrame(data)  # Convert the list of dictionaries into a Pandas DataFrame

results_df  # Display the DataFrame

Unnamed: 0,Speed Range,avg_reward,avg_steps_to_merge,avg_episode_time,number_collisions,successful_merges,number_dangerous_episodes
0,10.0 - 10.5,15.03679,14.58,0.390161,5,0,5
1,10.5 - 11.0,15.073469,14.61,0.377415,3,0,3
2,11.0 - 11.5,15.166168,14.7,0.381687,1,0,1
3,11.5 - 12.0,15.12166,14.66,0.415553,2,0,2
4,12.0 - 12.5,15.110789,14.655,0.381923,3,0,3
5,12.5 - 13.0,15.081917,14.63,0.381351,4,0,4
6,13.0 - 13.5,15.046655,14.595,0.383059,5,0,5
7,13.5 - 14.0,15.176923,14.72,0.38837,2,0,2
8,14.0 - 14.5,15.128201,14.67,0.392312,3,0,3
9,14.5 - 15.0,15.127369,14.67,0.390487,3,0,3


### **Analysis of Speed Ranges**

- **Performance Consistency:** Across the evaluated speed ranges, **average rewards remain relatively stable**, with most values ranging between **15.0** and **15.2**, suggesting consistent performance in reward accumulation despite fluctuations in other metrics.

- **Optimal Speed Range for Merging:** The **19.5 - 20.0** speed range stands out as the **most effective operational window**. It achieves a significantly higher **198 successful merges**, with only **2 collisions** and **2 dangerous driving episodes**. Additionally, the **average steps to merge drop to 11.585**, and the **average episode time is reduced to 0.316 seconds**, highlighting both **efficiency and stability**.

- **Collision Metrics:** In lower speed ranges, such as **10.0 - 10.5** and **13.0 - 13.5**, collision counts reach **5 incidents**, indicating instability despite moderately high rewards. As the speed approaches **19.5 - 20.0**, the collision rate significantly decreases, emphasizing improved control and decision-making.

- **Average Steps to Merge:** The agent generally requires around **14.6 steps** across most ranges to complete a merge. However, the **19.5 - 20.0** range shows a significant improvement, with only **11.585 steps**, indicating **more decisive and efficient actions** at this speed.

- **Time Efficiency:** The **average episode time** shows minor fluctuations across the ranges, averaging approximately **0.39 seconds**. However, the **19.5 - 20.0** range achieves a notably lower time of **0.316 seconds**, reflecting **faster decision-making and task execution**.

- **Dangerous Driving Episodes:** Dangerous episodes due to sudden speed changes remain relatively low across most ranges, with occasional spikes in ranges like **10.0 - 10.5** and **13.0 - 13.5** (**5 dangerous episodes each**). The **19.5 - 20.0** range maintains one of the **lowest counts with only 2 dangerous episodes**, reinforcing its overall **stability and control**.

The **19.5 - 20.0 speed range emerges as the optimal choice** for merging scenarios. It excels in **successful merges**, maintains **low collision rates**, minimizes **dangerous driving episodes**, and demonstrates **efficient time and step metrics**. In contrast, other ranges exhibit higher instability, increased collision counts, and inefficient merging behavior. Therefore, the **19.5 - 20.0** range represents the **most effective operational window for the driving agent**, balancing **safety, efficiency, and performance consistency**.

In [55]:
# Load the trained model
model = PPO.load("vel_study_10_20/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_10_20.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_10_20.step(action)
    frame = env_10_20.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_10_20.close()

# Save the frames as a video
video_filename = "velocity_study.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")



crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
Video saved as velocity_study.mp4


In [56]:
# Display the video
video_filename = "velocity_study.mp4"
Video(video_filename, embed=True)