# **Study on the Ideal Behaviour, on the highway, when another vehicle is merging into it - Brake. Access of the influence on the ego vehicle**

##### This study aims to determine the optimal strategy for the ego vehicle when another vehicle is merging into its highway. The only variables under consideration are the distance between the ego vehicle and the merging vehicle and the reward `braking_reward`. The goal is to find the **optimal raking distance** that encourages the ego vehicle to accelerate, ensuring both safety and traffic efficiency.

### **Imports**

In [1]:
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
from highway_env.envs import MergeEnv
from highway_env import utils
import imageio
import cv2
from IPython.display import Video
import time
import numpy as np
from stable_baselines3 import PPO
import os
%matplotlib inline

### **Custom environment**

With the ego-vehicle on the highway and a merging vehicle the following branking distances:[0,10], [10,20], [20,30], [30,40], [40,50]

**Fixed Rewards**
- high speed = 1
- lane change = -5
- right lane = 3
- braking = 5

In [2]:
class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        
        road = self.road
        # Ego vehicle
        ego_vehicle = self.action_type.vehicle_class(
            road, road.network.get_lane(("a", "b", 1)).position(30, 0), speed=30
        )
        road.vehicles.append(ego_vehicle)

        other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])

        # Merging vehicle
        merging_v = other_vehicles_type(
            road, road.network.get_lane(("j", "k", 0)).position(110, 0), speed=20
        )
        merging_v.target_speed = 30
        road.vehicles.append(merging_v)
        
        # Set the ego vehicle as the primary vehicle
        self.vehicle = ego_vehicle

    def _reward(self, action: int) -> float:
        """
        Custom reward function combining the original reward with proximity-based
        braking behavior to allow the merging vehicle to merge safely.
        """
        # Get the original reward from the parent class (if it exists)
        reward = super()._reward(action)
        
        ego_vehicle = self.vehicle
        road = self.road

        # Find the merging vehicle (the vehicle in the merging lane)
        merging_vehicle = None
        for vehicle in road.vehicles:
            if vehicle.lane_index == ("j", "k", 0):  # Assuming this is the merging lane
                merging_vehicle = vehicle
                break
        
        # Calculate distance to the merging vehicle
        if not merging_vehicle:
            return reward
            
        distance = abs(ego_vehicle.position[0] - merging_vehicle.position[0])
       
        d_min, d_max = self.config.get("braking_interval", [0, 10])
        
        if d_min< distance < d_max :
            
            braking_reward = self.config.get("braking_reward", 5)
           
        else:
            braking_reward = self.config.get("braking_reward", 0)
        
        reward += braking_reward

        return reward

In [3]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv',  
)

### **Braking distance interval [0,10] meters**

In [4]:
env_dis_0_10 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[0,10]})

In [None]:
model = PPO('MlpPolicy', env_dis_0_10,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_0_10_ego_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_0_10_ego_influence/model")

### **Braking distance interval [10,20] meters**


In [5]:
env_dis_10_20 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[10,20]})

In [None]:
model = PPO('MlpPolicy', env_dis_10_20,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_10_20_ego_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_10_20_ego_influence/model")

### **Braking distance interval [20,30] meters**

In [6]:
env_dis_20_30 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[20,30]})

In [None]:
model = PPO('MlpPolicy', env_dis_20_30,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_20_30_ego_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_20_30_ego_influence/model")

### **Braking distance interval [30,40] meters**

In [7]:
env_dis_30_40 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[30,40]})

In [None]:
model = PPO('MlpPolicy', env_dis_30_40,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_30_40_ego_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_30_40_ego_influence/model")

### **Braking distance interval [40,50] meters**

In [5]:
env_dis_40_50 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[40,50]})

In [None]:
model = PPO('MlpPolicy', env_dis_40_50,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_40_50_ego_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_40_50_ego_influence/model")

### **Evaluate and compare the models**

**For the braking distance  (0,10)**
- Average Reward: 6.3829
- Average Steps to Merge: 14.0
- Average Episode Time: 0.11 seconds
- Number of Collisions: 0
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 3

**For the braking distance  (10,20)**
- Average Reward: 12.5829
- Average Steps to Merge: 14.0
- Average Episode Time: 0.11 seconds
- Number of Collisions: 0
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the braking distance  (20,30)**
- Average Reward: 12.5829
- Average Steps to Merge: 14.0
- Average Episode Time: 0.11 seconds
- Number of Collisions: 0
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the braking distance  (30,40)**
- Average Reward: 12.5829
- Average Steps to Merge: 14.0
- Average Episode Time: 0.11 seconds
- Number of Collisions: 0
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the braking distance  (40,50)**
- Average Reward: 12.5829
- Average Steps to Merge: 14.0
- Average Episode Time: 0.12 seconds
- Number of Collisions: 0
- Successful Merges: 0
- Number of Dangerous Driving Episodes (sudden speed changes): 0

In [9]:
# Função para testar o agente e coletar métricas
def evaluate_agent(model, env, num_episodes, speed_threshold_ratio=0.3):
    total_rewards = []
    total_collisions = 0
    successful_merges = 0
    dangerous_driving_episodes = 0
    total_steps_to_merge = []
    total_episode_times = []  # Lista para armazenar os tempos de cada episódio

    # Calcula o threshold com base no intervalo de velocidade
    reward_speed_range = env.config["reward_speed_range"]
    speed_threshold = (reward_speed_range[1] - reward_speed_range[0]) * speed_threshold_ratio
    
    ego_vehicle = env.vehicle
    #print("ego", ego_vehicle)
    road = env.road
    
    for vehicle in road.vehicles:
            #print("vehicle",vehicle, "lane", vehicle.lane_index)
           
            if vehicle != ego_vehicle:
                merging_vehicle = vehicle
                merging_vehicle.lane_index =  ("j", "k", 0)  # Assuming this is the merging lane
                
                break
    #print("merging_vehicle2", merging_vehicle.lane_index)
    #print("merging_vehicle2_pos", merging_vehicle.position)

    for episode in range(num_episodes):
        start_time = time.time()  # Registra o tempo de início do episódio
        obs, info = env.reset()
        done = False
        episode_reward = 0
        collisions = 0
        dangerous_driving = False
        steps_to_merge = 0
        last_speed = None  # Inicializa a velocidade anterior como None
        

        while not done:
            # O agente escolhe uma ação
            action, _states = model.predict(obs, deterministic=True)
            # Executa a ação no ambiente
            obs, reward, terminated, truncated, info = env.step(action)

            dangerous_driving = False
            episode_reward += reward
            steps_to_merge += 1

            # Verifica a velocidade atual e arredonda para 2 casas decimais
            current_speed = round(info.get('speed', 0), 2)

            # Verifica se houve uma mudança brusca de velocidade
            if last_speed is not None and abs(current_speed - last_speed) > speed_threshold:
                dangerous_driving = True

            last_speed = current_speed  # Atualiza a velocidade anterior

            # Verifica se houve colisão
            if 'crashed' in info and info['crashed']:
                collisions += 1

            # Verifica se o episódio terminou (seja por 'terminated' ou 'truncated')
            done = terminated or truncated

            # Verifica se há informações do veículo de merge
            if merging_vehicle:
                ego_position = ego_vehicle.position  # [x, y] of ego vehicle
                merging_position = merging_vehicle.position  # [x, y] of merging vehicle

                ego_lane = ego_vehicle.lane_index  # Lane of the ego vehicle
                merging_lane = merging_vehicle.lane_index  # Lane of the merging vehicle
                #print()
                # Condição de sucesso: sem colisões e o veículo de merging fez o merge corretamente
                if (not collisions and ego_position[0] < merging_position[0] and 3<merging_position[1]<6 and done):
                    successful_merges += 1

        # Registra as métricas do episódio
        total_rewards.append(episode_reward)
        total_collisions += collisions
        total_steps_to_merge.append(steps_to_merge)

        if dangerous_driving:
            dangerous_driving_episodes += 1

        # Calcula o tempo do episódio e adiciona à lista
        episode_time = time.time() - start_time
        total_episode_times.append(episode_time)

    # Cálculos das métricas finais
    avg_reward = np.mean(total_rewards)
    avg_steps_to_merge = np.mean(total_steps_to_merge)
    avg_episode_time = np.mean(total_episode_times)  # Calcula o tempo médio de cada episódio

    # Exibir resultados
    print(f"Average Reward: {avg_reward}")
    print(f"Average Steps to Merge: {avg_steps_to_merge}")
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")  # Exibe o tempo médio dos episódios
    print(f"Number of Collisions: {total_collisions}")
    print(f"Successful Merges: {successful_merges}")
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")

    return {
        "avg_reward": avg_reward,
        "avg_steps_to_merge": avg_steps_to_merge,
        "avg_episode_time": avg_episode_time,  # Adiciona o tempo médio de episódio ao retorno
        "number_collisions": total_collisions,
        "successful_merges": successful_merges,
        "number_dangerous_episodes": dangerous_driving_episodes
    }

In [10]:
# Carregar o modelo treinado e avaliar
model = PPO.load("env_0_10_ego_influence/model")

# Avaliar o modelo
results = evaluate_agent(model, env_dis_0_10, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [11]:
# Carregar o modelo treinado e avaliar
model = PPO.load("env_10_20_ego_influence/model")

# Avaliar o modelo
results = evaluate_agent(model, env_dis_10_20, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [12]:
# Carregar o modelo treinado e avaliar
model = PPO.load("env_20_30_ego_influence/model")

# Avaliar o modelo
results = evaluate_agent(model, env_dis_20_30, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [13]:
# Carregar o modelo treinado e avaliar
model = PPO.load("env_30_40_ego_influence/model")

# Avaliar o modelo
results = evaluate_agent(model, env_dis_30_40, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

In [14]:
# Carregar o modelo treinado e avaliar
model = PPO.load("env_40_50_ego_influence/model")

# Avaliar o modelo
results = evaluate_agent(model, env_dis_40_50, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overF

The results from all tested braking distance ranges (from 0-10 meters to 40-50 meters) showed consistent performance across the different methods. In all cases, the **average reward** was highest at 12.58 for braking distances from 10 meters onward, indicating a more balanced approach to braking that prioritized safety without excessively reducing the vehicle's speed.

The **average steps to merge** and the **average episode time** remained stable across all braking distances, with a constant value of 14 steps and approximately 0.11 seconds per episode. This suggests that the model was able to maintain a similar level of responsiveness and speed during the merging process, regardless of the braking distance. 

In terms of safety, **no collisions** occurred in any of the scenarios, which is a strong indication that the braking distances implemented in the study were effective in avoiding accidents. Furthermore, no **successful merges** were recorded, which could suggest that while the ego vehicle was able to maintain safety by slowing down appropriately, it did not optimize its strategy for efficient merging under the given conditions.

Another important observation was the **number of dangerous driving episodes**. When the braking distance was set between 0 and 10 meters, three episodes of sudden speed changes occurred, which might indicate that the shorter braking distance caused the ego vehicle to react too aggressively, potentially leading to unsafe driving. In contrast, with longer braking distances (from 10 meters to 50 meters), no dangerous driving episodes were observed, suggesting that a more gradual approach to braking reduced the likelihood of sudden maneuvers.

Overall, the results suggest that a **braking distance between 10 meters and 50 meters effectively balanced safety and vehicle control, avoiding both collisions and dangerous driving behaviors.** However, the lack of successful merges across all scenarios indicates that while the ego vehicle was capable of responding safely, further optimization of merging strategies is needed to improve merging success without compromising safety.

In [8]:
# Load the trained model
model = PPO.load("env_0_10_ego_influence/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_dis_0_10.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_dis_0_10.step(action)
    frame = env_dis_0_10.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_dis_0_10.close()

# Save the frames as a video
video_filename = "0_10_distance.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
Video saved as 0_10_distance.mp4


In [9]:
# Load the trained model
model = PPO.load("env_40_50_ego_influence/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_dis_40_50.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_dis_40_50.step(action)
    frame = env_dis_40_50.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_dis_40_50.close()

# Save the frames as a video
video_filename = "40_50_distance.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
Video saved as 40_50_distance.mp4


In [10]:
# Display the video
video_filename = "0_10_distance.mp4"
Video(video_filename, embed=True)

In [11]:
# Display the video
video_filename = "40_50_distance.mp4"
Video(video_filename, embed=True)