# **Study on the Ideal Behaviour, on the highway, when another vehicle is merging into it - Change Lanes**

##### This study aims to determine the optimal strategy for the ego vehicle when another vehicle is merging into its highway. The only variables under consideration is the lane changing reward `lane_changing_reward`. The goal is to find the effect of changing lanes when approaching a certain distance from the merging point, ensuring both safety and traffic efficiency.

### **Imports**

In [1]:
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
from highway_env.envs import MergeEnv
from highway_env import utils
import time
import numpy as np
from stable_baselines3 import PPO
import os
%matplotlib inline

### **Custom environment with only ego vehicle and merging vehicle**

**Fixed Rewards**
- high speed = 1
- lane change = -5
- right lane = 3
- changing lanes = 5 (at 5 meters from the merging vehicle)

In [None]:
class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        
        road = self.road
        # Ego vehicle
        ego_vehicle = self.action_type.vehicle_class(
            road, road.network.get_lane(("a", "b", 1)).position(30, 0), speed=30
        )
        road.vehicles.append(ego_vehicle)

        other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])

        # Merging vehicle
        merging_v = other_vehicles_type(
            road, road.network.get_lane(("j", "k", 0)).position(110, 0), speed=20
        )
        merging_v.target_speed = 30
        road.vehicles.append(merging_v)
        
        # Set the ego vehicle as the primary vehicle
        self.vehicle = ego_vehicle

    def _reward(self, action: int) -> float:
        """
        Custom reward function combining the original reward with proximity-based
        braking behavior to allow the merging vehicle to merge safely.
        """
        # Get the original reward from the parent class (if it exists)
        reward = super()._reward(action)
        
        ego_vehicle = self.vehicle
        road = self.road

        # Find the merging vehicle (the vehicle in the merging lane)
        merging_vehicle = None
        for vehicle in road.vehicles:
            if vehicle.lane_index == ("j", "k", 0):  # Assuming this is the merging lane
                merging_vehicle = vehicle
                break
        
        # Calculate distance to the merging vehicle
        if not merging_vehicle:
            return reward
        
        # Calculate relative positions and velocities
        distance_to_merging_vehicle = abs(merging_vehicle.position[0] - ego_vehicle.position[0])
        near_merge_point = abs(ego_vehicle.position[0] - road.network.get_lane(("b", "c", 0)).position(0, 0)[0]) < 100

        # Estimate highway vehicle's deceleration based on change in speed
        if not hasattr(self, "_previous_merging_speed"):
            self._previous_merging_speed = merging_vehicle.speed  # Initialize previous speed

        merging_acceleration = merging_vehicle.speed - self._previous_merging_speed
        self._previous_merging_speed = merging_vehicle.speed  # Update for the next step

        # Penalize ego vehicle for influencing the highway vehicle's behavior
        influence_penalty = 0.0
        if near_merge_point and distance_to_merging_vehicle < 20:  # Close to the highway vehicle
            if merging_acceleration < -1.0:  # Significant deceleration (tunable threshold)
                print("Highway vehicle influenced: significant deceleration detected")
                influence_penalty = self.config.get("influence_penalty", 5.0)  # Large penalty for interference

       
        d_min, d_max = self.config.get("braking_interval", [0, 10])
        
        if distance_to_merging_vehicle < 5 :
            
            lane_changing_reward = self.config.get("lane_changing_reward", 5)
           
        else:
            lane_changing_reward = self.config.get("lane_changing_reward", 0)
    
        reward += lane_changing_reward
        reward += influence_penalty

        return reward

In [None]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv',  # Entry point for your custom environment
)

## **Training**

In [None]:
env_change_lanes = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3})

In [None]:
model = PPO('MlpPolicy', env_change_lanes,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_change_lanes_other_influence/")
timesteps = 50000
model.learn(total_timesteps=timesteps)
model.save("env_change_lanes_other_influence/model")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to env_dis_40_50_brake_close/PPO_1
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
cras

## **Testing**


COLOCAR AQUI RESULTADOS DOS TESTES PARA SER MAIS FÁCIL VER

In [None]:
# Função para testar o agente e coletar métricas
def evaluate_agent(model, env, num_episodes, speed_threshold_ratio=0.3):
    total_rewards = []
    total_collisions = 0
    successful_merges = 0
    dangerous_driving_episodes = 0
    total_steps_to_merge = []
    total_episode_times = []  # Lista para armazenar os tempos de cada episódio

    # Calcula o threshold com base no intervalo de velocidade
    reward_speed_range = env.config["reward_speed_range"]
    speed_threshold = (reward_speed_range[1] - reward_speed_range[0]) * speed_threshold_ratio
    
    ego_vehicle = env.vehicle
    #print("ego", ego_vehicle)
    road = env.road
    
    for vehicle in road.vehicles:
            #print("vehicle",vehicle, "lane", vehicle.lane_index)
           
            if vehicle != ego_vehicle:
                merging_vehicle = vehicle
                merging_vehicle.lane_index =  ("j", "k", 0)  # Assuming this is the merging lane
                
                break
    #print("merging_vehicle2", merging_vehicle.lane_index)
    #print("merging_vehicle2_pos", merging_vehicle.position)

    for episode in range(num_episodes):
        start_time = time.time()  # Registra o tempo de início do episódio
        obs, info = env.reset()
        done = False
        episode_reward = 0
        collisions = 0
        dangerous_driving = False
        steps_to_merge = 0
        last_speed = None  # Inicializa a velocidade anterior como None
        changed_lane = False
        

        while not done:
            # O agente escolhe uma ação
            action, _states = model.predict(obs, deterministic=True)

            if action == 0:
                changed_lane = True
            # Executa a ação no ambiente
            obs, reward, terminated, truncated, info = env.step(action)

            dangerous_driving = False
            episode_reward += reward
            steps_to_merge += 1

            # Verifica a velocidade atual e arredonda para 2 casas decimais
            current_speed = round(info.get('speed', 0), 2)

            # Verifica se houve uma mudança brusca de velocidade
            if last_speed is not None and abs(current_speed - last_speed) > speed_threshold:
                dangerous_driving = True

            last_speed = current_speed  # Atualiza a velocidade anterior

            # Verifica se houve colisão
            if 'crashed' in info and info['crashed']:
                collisions += 1

            # Verifica se o episódio terminou (seja por 'terminated' ou 'truncated')
            done = terminated or truncated

            # Verifica se há informações do veículo de merge
            if merging_vehicle:
                ego_position = ego_vehicle.position  # [x, y] of ego vehicle
                merging_position = merging_vehicle.position  # [x, y] of merging vehicle

                ego_lane = ego_vehicle.lane_index  # Lane of the ego vehicle
                merging_lane = merging_vehicle.lane_index  # Lane of the merging vehicle
                #print()
                # Condição de sucesso: sem colisões, o veículo de merging fez o merge corretamente e o ego mudou de faixa
                if (not collisions and ego_position[0] < merging_position[0] and 3 < merging_position[1] < 6 and changed_lane and done):
                    successful_merges += 1

        # Registra as métricas do episódio
        total_rewards.append(episode_reward)
        total_collisions += collisions
        total_steps_to_merge.append(steps_to_merge)

        if dangerous_driving:
            dangerous_driving_episodes += 1

        # Calcula o tempo do episódio e adiciona à lista
        episode_time = time.time() - start_time
        total_episode_times.append(episode_time)

    # Cálculos das métricas finais
    avg_reward = np.mean(total_rewards)
    avg_steps_to_merge = np.mean(total_steps_to_merge)
    avg_episode_time = np.mean(total_episode_times)  # Calcula o tempo médio de cada episódio

    # Exibir resultados
    print(f"Average Reward: {avg_reward}")
    print(f"Average Steps to Merge: {avg_steps_to_merge}")
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")  # Exibe o tempo médio dos episódios
    print(f"Number of Collisions: {total_collisions}")
    print(f"Successful Merges: {successful_merges}")
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")

    return {
        "avg_reward": avg_reward,
        "avg_steps_to_merge": avg_steps_to_merge,
        "avg_episode_time": avg_episode_time,  # Adiciona o tempo médio de episódio ao retorno
        "number_collisions": total_collisions,
        "successful_merges": successful_merges,
        "number_dangerous_episodes": dangerous_driving_episodes
    }

In [None]:
# Carregar o modelo treinado e avaliar
model = PPO.load("env_change_lanes_other_influence/model")

# Avaliar o modelo
results = evaluate_agent(model, env_change_lanes, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
ego MDPVehicle #112: [370.99999745   4.        ]
merge IDMVehicle #152: [383.27804123   4.00000018]
ego ('c', 'd', 1)
merge ('j', 'k', 0)
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
ego MDPVehicle #112: [370.99999745   4.        ]
merge IDMVehicle #152: [383.27804123   4.00000018]
ego ('c', 'd', 1)
merge ('j', 'k', 0)
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
o