# **Study on the Ideal Behaviour for Merging into the Highway**

##### This study aims to determine the optimal strategy for the ego vehicle to safely and efficiently merge onto a highway, prioritizing the action of braking to allow oncoming vehicles to pass. The variables under consideration are the reward for the braking action, which will be shaped based on how close the oncoming vehicle is, and an influence penalty, which penalizes in case the other vehicle behaviour changes due to the ego vehicle. The goal is to find the optimal reward configuration that encourages the ego vehicle to brake at the right moment, ensuring both safety and traffic efficiency and affect the least the behaviour of the other vehicle.

### **Imports**

In [43]:
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
import highway_env
import pandas as pd
import time
import numpy as np
from stable_baselines3 import PPO
from highway_env import utils
from highway_env.envs import MergeEnv
from highway_env.vehicle.controller import ControlledVehicle
%matplotlib inline

### **Creation of the environment**

##### With the ego-vehicle on the merging lane and a single vehicle on the highway, on the right most lane and a costumized reward function

In [None]:
class RightLaneVehicle(ControlledVehicle):
    """
    Um veículo que é restrito a ficar na lane da direita e nunca muda de lane.
    """
    def act(self, action: int = None) -> None:
        # Assegura que o veículo não mude de lane (desautoriza ações 0 e 2 para mudança de lane)
        if action in [0, 2]:  # Ações para mudar para a esquerda ou direita
            action = 1  # Forçar a manter a lane (ação 1)
        super().act(action)


class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        road = self.road

        # Ponto de mesclagem (merge) na lane 0
        merge_position = road.network.get_lane(("b", "c", 0)).position(0, 0)  # Ponto de mesclagem na autoestrada
        
        # Posição inicial do veículo ego na lane de mesclagem
        ego_initial_position = road.network.get_lane(("j", "k", 0)).position(30, 0)  # Ego vehicle na lane de mesclagem

        # Posição inicial do veículo da autoestrada na lane mais à direita (lane 1)
        highway_vehicle_initial_position = road.network.get_lane(("a", "b", 1)).position(80, 0)  # Na lane 1 da autoestrada

        # Definir velocidades iniciais
        ego_speed = 20  # Velocidade inicial do ego
        highway_speed = 30  # Velocidade inicial do veículo na autoestrada

        # Calcular o tempo para ambos os veículos chegarem ao ponto de mesclagem
        time_to_merge = (merge_position[0] - ego_initial_position[0]) / ego_speed

        # Ajustar a velocidade do veículo da autoestrada para garantir que ambos cheguem ao mesmo tempo
        highway_vehicle_speed = (merge_position[0] - highway_vehicle_initial_position[0]) / time_to_merge

        # Criar o veículo ego na lane de mesclagem
        ego_vehicle = self.action_type.vehicle_class(
            road, ego_initial_position, speed=ego_speed
        )
        road.vehicles.append(ego_vehicle)

        # Criar o veículo na lane da direita da autoestrada (lane 1)
        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        # Definir o veículo ego como o veículo principal
        self.vehicle = ego_vehicle

        # Debug: Verificar posições e velocidades dos veículos
        print(f"Posição do veículo ego: {ego_vehicle.position}, Velocidade: {ego_vehicle.speed}")
        print(f"Posição do veículo da autoestrada: {highway_vehicle.position}, Velocidade: {highway_vehicle.speed}")



    def _reward(self, action: int) -> float:
        """
        Custom reward function that incentivizes the ego vehicle to brake near the merging point 
        and let the highway vehicle pass before merging.
        """
        # Get the original reward from the parent class (if it exists)
        reward = super()._reward(action)
        
        ego_vehicle = self.vehicle
        road = self.road

        # Find the highway vehicle (vehicle in the rightmost lane)
        highway_vehicle = None
        for vehicle in road.vehicles:
            if isinstance(vehicle, RightLaneVehicle):  # Identify the highway vehicle
                highway_vehicle = vehicle
                break
        
        if not highway_vehicle:
            return reward

        # Calculate relative positions and velocities
        distance_to_highway_vehicle = highway_vehicle.position[0] - ego_vehicle.position[0]
        is_ahead = distance_to_highway_vehicle > 0  # Check if the highway vehicle is ahead
        near_merge_point = abs(ego_vehicle.position[0] - road.network.get_lane(("b", "c", 0)).position(0, 0)[0]) < 100

        # Estimate acceleration based on change in speed
        if not hasattr(self, "_previous_speed"):
            self._previous_speed = ego_vehicle.speed  # Initialize previous speed

        # Calculate acceleration as change in speed over time (assuming time step of 1)
        acceleration = ego_vehicle.speed - self._previous_speed
        self._previous_speed = ego_vehicle.speed  # Update for the next step

        # Estimate acceleration of the highway vehicle
        if not hasattr(self, "_previous_highway_speed"):
            self._previous_highway_speed = highway_vehicle.speed  # Initialize previous speed

        highway_acceleration = highway_vehicle.speed - self._previous_highway_speed
        self._previous_highway_speed = highway_vehicle.speed  # Update for the next step

        
        # Reward for braking and letting the highway vehicle pass
        braking_reward = 0.0
        if near_merge_point and is_ahead:
            # Ego vehicle should brake
            if ego_vehicle.speed < highway_vehicle.speed and acceleration < 0:
                braking_reward = self.config.get("braking_bonus", 1.0)  # Incentive for braking
            else:
                braking_reward = -self.config.get("braking_penalty", 1.0)
            # Additional reward if ego vehicle stays behind the highway vehicle
            if distance_to_highway_vehicle > 0 and ego_vehicle.speed < highway_vehicle.speed:
                braking_reward += self.config.get("yielding_bonus", 2.0)
            else:
                braking_reward -= self.config.get("yielding_penalty", 2.0)
        
        # Penalize interference with the highway vehicle
        influence_penalty = 0.0
        if near_merge_point and distance_to_highway_vehicle < 20:  # Close to the highway vehicle
            if highway_acceleration < -1.0:  # Significant deceleration detected
                influence_penalty = self.config.get("influence_penalty", 5.0)  # Large penalty for interference


        # Total reward includes the braking incentive and interference penalty
        reward += braking_reward - influence_penalty

        # Debug information
        print(f"Distance to highway vehicle: {distance_to_highway_vehicle}, Ego speed: {ego_vehicle.speed}, Highway speed: {highway_vehicle.speed}, Highway acceleration: {highway_acceleration}")
        print(f"Braking reward: {braking_reward}, Influence penalty: {influence_penalty}, Total reward: {reward}")

        return reward


In [45]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv',  # Entry point for your custom environment
)

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [None]:
env = gym.make("CustomMerge-v0", render_mode='rgb_array')
pprint.pprint(env.unwrapped.config)

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0


### **Training the models for several rewards**

#### Initial configuration with balanced values

In [None]:
env.config.update({
    "braking_bonus": 1.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 2.0,
    "yielding_penalty": 2.0,
    "influence_penalty": 5.0
})

In [None]:
model = PPO('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_0/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_0/model")

#### Configuration to force the ego vehicle to let the highway vehicle go ahead

In [None]:
env.config.update({
    "braking_bonus": 1.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 4.0,
    "yielding_penalty": 2.0,
    "influence_penalty": 5.0
})

In [None]:
model = PPO('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_1/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_1/model")

#### Configuration to severally punish influences on the highway vehicle behaviour

In [None]:
env.config.update({
    "braking_bonus": 1.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 2.0,
    "yielding_penalty": 2.0,
    "influence_penalty": 10.0
})

In [None]:
model = PPO('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_2/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_2/model")

#### "Safe" configuration - increase the yielding_bonus and the influence_penalty

In [None]:
env.config.update({
    "braking_bonus": 1.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 4.0,
    "yielding_penalty": 2.0,
    "influence_penalty": 10.0
})

In [None]:
model = PPO('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_3/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_3/model")

#### "Agressive" configuration - Reduce yielding_bonus and increase the braking_bonus

In [None]:
env.config.update({
    "braking_bonus": 4.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 0.5,
    "yielding_penalty": 2.0,
    "influence_penalty": 5.0
})

In [None]:
model = PPO('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_4/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_4/model")

### **Evaluate the Comparate the Models**