# **Study on the Ideal Speed for Highway Merging**

##### This study aims to determine the optimal speed for safely and efficiently merging onto highways. The process begins by evaluating a range of speed intervals to assess which performs best in terms of safety and traffic flow, by changing it in the 'reward_speed_range' from the environment's configuration. Afterward, the range of this interval will be progressively narrowed down to pinpoint the exact speed(s) that offer the best results, either identifying a single optimal speed or multiple ones if no clear standout emerges.

### **Imports**

In [1]:
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
import highway_env
from highway_env import utils
from highway_env.envs import MergeEnv
import pandas as pd
import time
import numpy as np
from stable_baselines3 import PPO
%matplotlib inline

### **Creation of the environment**

##### With the ego-vehicle on the merging lane and the following Speed Ranges:[0,10], [10,20], [20,30] (the default), [30,40], [40,50], [50,60]


In [2]:
class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        """
        Populate a road with several vehicles on the highway and an ego-vehicle on the merging lane.

        The ego-vehicle will only be placed on the merging lane, while other vehicles will be on the highway lanes.

        :return: the ego-vehicle
        """
        road = self.road
        
        # Ego vehicle on the merging lane
        ego_vehicle = self.action_type.vehicle_class(
            road, road.network.get_lane(("j", "k", 0)).position(30, 0), speed=20  # Merging lane
        )
        road.vehicles.append(ego_vehicle)

        other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])

        # Create multiple vehicles on the highway lanes
        for position, speed in [(90, 29), (70, 31), (5, 31.5), (120, 28), (50, 32)]:
            lane = road.network.get_lane(("a", "b", self.np_random.integers(2)))  # Highway lanes (0 or 1)
            position = lane.position(position + self.np_random.uniform(-5, 5), 0)  # Randomize position slightly
            speed += self.np_random.uniform(-1, 1)  # Randomize speed slightly
            road.vehicles.append(other_vehicles_type(road, position, speed=speed))

        # Set the ego vehicle as the primary vehicle
        self.vehicle = ego_vehicle


In [3]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv',  # Entry point for your custom environment
)

### **Merging velocity interval [0,10]**

In [4]:
# Creating an instance of the custom environment
env_0_10 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [0, 10]})

In [None]:
model = PPO('MlpPolicy', env_0_10,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_0_10/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_0_10/model")

### **Merging velocity interval [10,20]**

In [5]:
# Creating an instance of the custom environment
env_10_20 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [10,20]})

In [None]:
model = PPO('MlpPolicy', env_10_20,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_10_20/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_10_20/model")

### **Merging velocity interval [20,30]**

In [6]:
# Creating an instance of the custom environment
env_20_30 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [20,30]})

In [None]:
model = PPO('MlpPolicy', env_20_30,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_20_30/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_20_30/model")

### **Merging velocity interval [30,40]**

In [7]:
# Creating an instance of the custom environment
env_30_40 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [30, 40]})

In [None]:
model = PPO('MlpPolicy', env_30_40,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_30_40/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_30_40/model")

### **Merging velocity interval [40,50]**

In [8]:
# Creating an instance of the custom environment
env_40_50 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [40, 50]})

In [None]:
model = PPO('MlpPolicy', env_40_50,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_40_50/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_40_50/model")

### **Merging velocity interval [50,60]**

In [9]:
# Creating an instance of the custom environment
env_50_60 = gym.make("CustomMerge-v0", render_mode='rgb_array', config={"reward_speed_range": [50, 60]})

In [None]:
model = PPO('MlpPolicy', env_50_60,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="vel_study_50_60/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("vel_study_50_60/model")

### **Evaluate and compare the models**

**For the speed range (0,10)**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For the speed range (10,20)**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For the speed range (20,30)**
- Average Reward:
- Average Steps to Merge:
- Average Episode Time:
- Number of Collisions:
- Successful Merges:
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For the speed range (30,40)**
- Average Reward: 
- Average Steps to Merge:
- Average Episode Time: 
- Number of Collisions:
- Successful Merges:
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For the speed range (40,50)**
- Average Reward:
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes):

**For the speed range (50,60)**
- Average Reward:
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

In [10]:
# Function to evaluate the agent and collect metrics
def evaluate_agent(model, env, num_episodes, success_speed_range, speed_threshold_ratio=0.3):
    total_rewards = []  # List to store total rewards for each episode
    total_collisions = 0  # Counter for total collisions across all episodes
    successful_merges = 0  # Counter for successful merges
    dangerous_driving_episodes = 0  # Counter for episodes with dangerous driving behavior
    total_steps_to_merge = []  # List to store the number of steps taken to merge in each episode
    total_episode_times = []  # List to store the time taken for each episode

    # Calculate the threshold based on the speed range
    reward_speed_range = env.config["reward_speed_range"]
    speed_threshold = (reward_speed_range[1] - reward_speed_range[0]) * speed_threshold_ratio  # Speed threshold for detecting sudden changes

    for episode in range(num_episodes):
        start_time = time.time()  # Record the start time of the episode
        obs, info = env.reset()  # Reset the environment and get the initial observation
        done = False  # Variable to track if the episode is finished
        episode_reward = 0  # Variable to track the reward for the current episode
        collisions = 0  # Counter for collisions in the current episode
        dangerous_driving = False  # Flag to indicate if dangerous driving occurred
        steps_to_merge = 0  # Counter for steps taken to merge
        last_speed = None  # Initialize last speed as None

        while not done:  # Loop until the episode is done
            # The agent chooses an action
            action, _states = model.predict(obs, deterministic=True)
            # Execute the action in the environment
            obs, reward, terminated, truncated, info = env.step(action)

            dangerous_driving = False  # Reset dangerous driving flag
            episode_reward += reward  # Accumulate reward for the episode
            steps_to_merge += 1  # Increment the steps to merge counter

            # Check the current speed and round to 2 decimal places
            current_speed = round(info.get('speed', 0), 2)

            # Check for sudden speed changes
            if last_speed is not None and abs(current_speed - last_speed) > speed_threshold:
                dangerous_driving = True  # Mark as dangerous driving if speed change exceeds threshold

            last_speed = current_speed  # Update the last speed for the next iteration

            # Check for collisions
            if 'crashed' in info and info['crashed']:
                collisions += 1  # Increment collision counter if a crash occurred

            # Check if the episode has ended (either 'terminated' or 'truncated')
            done = terminated or truncated

        # Consider an episode a success if there were no collisions and final speed is within the desired range
        if not collisions and success_speed_range[0] <= current_speed <= success_speed_range[1]:
            successful_merges += 1  # Increment successful merges if criteria met

        # Log episode metrics
        total_rewards.append(episode_reward)  # Add episode reward to the total rewards list
        total_collisions += collisions  # Update total collisions count
        total_steps_to_merge.append(steps_to_merge)  # Add steps to merge for this episode

        if dangerous_driving:
            dangerous_driving_episodes += 1  # Increment count of dangerous driving episodes

        # Calculate the time taken for the episode and add to the list
        episode_time = time.time() - start_time  # Calculate elapsed time
        total_episode_times.append(episode_time)  # Add episode time to the list

    # Final metric calculations
    avg_reward = np.mean(total_rewards)  # Calculate average reward
    avg_steps_to_merge = np.mean(total_steps_to_merge)  # Calculate average steps to merge
    avg_episode_time = np.mean(total_episode_times)  # Calculate average episode time

    # Display results
    print(f"Average Reward: {avg_reward}")  # Print average reward
    print(f"Average Steps to Merge: {avg_steps_to_merge}")  # Print average steps to merge
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")  # Print average episode time
    print(f"Number of Collisions: {total_collisions}")  # Print total collisions
    print(f"Successful Merges: {successful_merges}")  # Print number of successful merges
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")  # Print count of dangerous driving episodes

    return {
        "avg_reward": avg_reward,  # Return average reward
        "avg_steps_to_merge": avg_steps_to_merge,  # Return average steps to merge
        "avg_episode_time": avg_episode_time,  # Return average episode time
        "number_collisions": total_collisions,  # Return total number of collisions
        "successful_merges": successful_merges,  # Return number of successful merges
        "number_dangerous_episodes": dangerous_driving_episodes  # Return number of dangerous driving episodes
    }

In [11]:
# Load the trained model
model = PPO.load("vel_study_0_10/model")  

# Evaluate the model
results = evaluate_agent(model, env_0_10, 200, (0, 10)) 

  logger.warn(


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [12]:
# Load the trained model
model = PPO.load("vel_study_10_20/model")

# Evaluate the model
results = evaluate_agent(model, env_10_20, 200, (10, 20))

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse


  logger.warn(


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [13]:
# Load the trained model
model = PPO.load("vel_study_20_30/model")

# Evaluate the model
results = evaluate_agent(model, env_20_30, 200, (20, 30))

  logger.warn(


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [14]:
# Load the trained model
model = PPO.load("vel_study_30_40/model")

# Evaluate the model
results = evaluate_agent(model, env_30_40, 200, (30, 40))

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse


  logger.warn(


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [15]:
# Load the trained model
model = PPO.load("vel_study_40_50/model")

# Evaluate the model
results = evaluate_agent(model, env_40_50, 200, (40, 50))

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse


  logger.warn(


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [16]:
# Load the trained model
model = PPO.load("vel_study_50_60/model")

# Evaluate the model
results = evaluate_agent(model, env_50_60, 200, (50, 60))

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse


  logger.warn(


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

**Analysis of Driving Agent Evaluation Results**

The evaluation of the driving agent across various speed ranges reveals significant performance differences as speed increases.

Starting with the **(0, 10)** range, the agent achieves the highest average reward of **16.43**. However, it records **0 successful merges** and **14 collisions**, indicating that while the agent may be accumulating rewards due to cautious driving, it fails to effectively execute highway merges. This lack of success may be attributed to overly conservative or inefficient decision-making at these lower speeds. Additionally, the **14 dangerous driving episodes** suggest that the agent exhibits instability even within this ostensibly safe speed range.

In the **(10, 20)** range, the agent shows marked improvement overall. It attains a decent average reward of **14.75**, with **173 successful merges** and **27 collisions**, making this range significantly better in terms of both safety and efficiency. Although the number of collisions is higher than in the **(0, 10)** range, the substantial number of successful merges indicates that the agent is effectively navigating merging scenarios. However, the **23 dangerous driving episodes** in this range suggest that while the agent is merging successfully, it is still facing challenges with sudden speed changes.

As speed increases to the **(20, 30)** range, the agent’s performance begins to decline, recording an average reward of **13.23**. Here, there are **181 successful merges** and **19 collisions**, showing that the agent can still perform merges effectively despite facing more challenges at higher speeds. The **19 dangerous driving episodes** indicate that the agent continues to struggle with maintaining safe driving behavior as speeds increase, but the ability to achieve successful merges remains relatively intact.

In the **(30, 40)** range, performance drops further, with no successful merges and an average reward of **11.69**. While there is only **5 collisions**, the rise in **5 dangerous driving episodes** signifies a growing difficulty in adapting to the increased speed. This trend worsens in the **(40, 50)** and **(50, 60)** ranges, where the agent again records 0 successful merges and experiences higher collision rates, with **12 collisions** in the **(40, 50)** range and **19 collisions** in the **(50, 60)** range, along with **12 and 19 dangerous driving episodes**, respectively.

The results clearly illustrate that the **(10, 20) speed range offers the best balance** between safety and efficiency for the driving agent. It achieves a high number of successful merges, maintains a relatively low collision rate compared to other ranges, and manages to execute merges more effectively. In contrast, the **(0, 10)** range, despite achieving a high reward, is rendered inefficient due to its total lack of successful merges. As speed increases beyond **20**, the agent struggles significantly, facing more collisions and dangerous driving episodes, highlighting that higher speeds present challenges that the current model is not well-equipped to handle. Thus, the **(10, 20) range emerges as the most effective for ensuring safe and successful driving behavior**.

### **Narrowing the (10,20) interval**

##### In order to identify the optimal speed range within the broader interval of (10, 20), a process of gradual refinement was employed. By breaking this interval into smaller subintervals of 0.5, such as (10, 10.5), (10.5, 11), and so on, each subinterval is evaluated independently using the same metrics as before. The goal of this narrowing process is to identify which specific speed range yields the highest rewards, minimizes collisions, and reduces dangerous driving episodes. By successively refining these subintervals and analyzing the results, we can pinpoint the exact optimal speed range where the agent performs most efficiently and safely. This step-by-step method ensures that performance is maximized within the interval of interest.

In [None]:
# Modified function to evaluate multiple speed subintervals of 0.5
def evaluate_speed_intervals(model, env, base_interval, step_size, num_episodes, speed_threshold_ratio=0.3):
    # Divide the base interval (10, 20) into subintervals of 'step_size' (0.5 in this case)
    subintervals = [(base_interval[0] + i, base_interval[0] + i + step_size) for i in np.arange(0, base_interval[1] - base_interval[0], step_size)]
    
    results = {}  # Initialize a dictionary to store results for each subinterval
    
    for subinterval in subintervals:
        print(f"Evaluating for speed range {subinterval}")  # Print the current subinterval being evaluated
        result = evaluate_agent(model, env, num_episodes, subinterval, speed_threshold_ratio)  # Evaluate the agent for the current subinterval
        results[subinterval] = result  # Store the result in the dictionary
        print("\n")  # Spacing between results for readability

    return results  # Return all results after evaluations

# Parameters for the base interval and the size of subintervals
base_interval = (10, 20)  # Define the base speed interval
step_size = 0.5  # Define the size of each subinterval
model = PPO.load("vel_study_10_20/model")  # Load the model from the specified path

# Evaluate the model in subintervals of 0.5
results = evaluate_speed_intervals(model, env_10_20, base_interval, step_size, num_episodes=200)  # Call the function to evaluate the model

In [None]:
# Print the results of the evaluations
print(f"Results: {results}")

In [None]:
# Create a list to hold the data
data = [] 

# Iterate through the results and flatten the structure
for speed_range, metrics in results.items():  # Loop through each speed range and its corresponding metrics
    row = {'Speed Range': f"{speed_range[0]} - {speed_range[1]}"}  # Create a new dictionary for the current row with the speed range
    row.update(metrics)  # Add metrics to the row dictionary
    data.append(row)  # Append the row to the data list

# Create a DataFrame from the list of dictionaries
results_df = pd.DataFrame(data)  # Convert the list of dictionaries into a Pandas DataFrame

results_df  # Display the DataFrame

**Analysis of Speed Ranges**

- **Performance Consistency:** The average rewards across the evaluated speed ranges demonstrate a consistent performance, with values generally falling between **14.6** and **15.0**. This indicates that, overall, the agent's behavior in merging scenarios remains stable regardless of speed within the specified ranges.

- **Collision Metrics:** A standout finding is the exceptional performance of the **19.5 - 20.0** speed range. This range not only yielded the highest number of successful merges (**178**) but also recorded only **22 collisions** and **18 dangerous episodes**. In contrast, lower ranges such as **12.0 - 12.5**, **13.5 - 14.0**, and **14.0 - 14.5** reported higher incidents of collisions and dangerous driving, suggesting that these speeds may be less optimal for safe merging.

- **Average Steps to Merge:** The average number of steps required to achieve a merge varied across the ranges but was notably higher at **15.5 - 16.0**, where the agent took **14.9** steps on average. This indicates that while this speed range may offer moderate rewards, it also entails inefficiencies, which could be detrimental in real-time scenarios.

- **Time Efficiency:** The **19.5 - 20.0** range exhibited a commendable average episode time of **0.365 seconds**, reflecting an effective merging process despite the risks. In contrast, other speed ranges tended to have slightly longer average episode times, which could imply delays in merging under those conditions.

- **Dangerous Driving Episodes:** While dangerous episodes occurred sporadically across multiple ranges, the **19.5 - 20.0** range experienced **18 instances**, indicating that higher speeds may coincide with manageable levels of dangerous maneuvers compared to the lower ranges, where dangerous driving was more prevalent.

In light of the aforementioned observations, it can be concluded that the **19.5 - 20.0 speed range emerges as the optimal choice for merging**. This range not only facilitates a high rate of successful merges but also maintains an efficient average episode time with manageable risk levels.

In [None]:
import gym
import pygame
import numpy as np
from stable_baselines3 import PPO

# Inicialize o pygame
pygame.init()
screen = pygame.display.set_mode((800, 600))  # Ajuste a resolução conforme necessário
pygame.display.set_caption("Episódios com PPO")

# Carregue o modelo e o ambiente
model = PPO.load("vel_study_10_20/model")
env = env_10_20

# Resete o ambiente
obs = env.reset()

# Loop para renderizar episódios
clock = pygame.time.Clock()
running = True
while running:
    # Captura eventos do Pygame para permitir fechar a janela
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    # Use o modelo para prever a ação
    action, _states = model.predict(obs)

    # Aplique a ação no ambiente
    obs, reward, done, info = env.step(action)

    # Renderize o ambiente
    frame = env.render(mode="rgb_array")  # Pegue o frame como array RGB
    frame_surface = pygame.surfarray.make_surface(np.transpose(frame, (1, 0, 2)))  # Transforme em Surface

    # Mostre o frame no pygame
    screen.blit(pygame.transform.scale(frame_surface, (800, 600)), (0, 0))
    pygame.display.flip()

    # Resete o ambiente se o episódio terminou
    if done:
        obs = env.reset()

    # Controle o FPS
    clock.tick(30)

# Feche o ambiente e o Pygame
env.close()
pygame.quit()
