# **Study on the Ideal Behaviour for Merging into the Highway**

##### This study aims to determine the optimal strategy for the ego vehicle to safely and efficiently merge onto a highway, prioritizing the action of braking to allow oncoming vehicles to pass. The only variable under consideration is the reward for the braking action, which will be shaped based on how close the oncoming vehicle is. The goal is to find the optimal reward configuration that encourages the ego vehicle to brake at the right moment, ensuring both safety and traffic efficiency. This reward will be progressively fine-tuned to determine the best braking behavior, either identifying a single optimal strategy or a range of effective solutions depending on the proximity of the approaching vehicle.

### **Imports**

In [1]:
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
import highway_env
import pandas as pd
import time
import numpy as np
from stable_baselines3 import PPO
from highway_env import utils
from highway_env.envs import MergeEnv
from highway_env.vehicle.controller import ControlledVehicle
%matplotlib inline

### **Creation of the environment**

##### With the ego-vehicle on the merging lane and a single vehicle on the highway, on the right most lane and a costumized reward function

In [6]:
class RightLaneVehicle(ControlledVehicle):
    """
    Um veículo que é restrito a ficar na lane da direita e nunca muda de lane.
    """
    def act(self, action: int = None) -> None:
        # Assegura que o veículo não mude de lane (desautoriza ações 0 e 2 para mudança de lane)
        if action in [0, 2]:  # Ações para mudar para a esquerda ou direita
            action = 1  # Forçar a manter a lane (ação 1)
        super().act(action)


class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        road = self.road

        # Ponto de mesclagem (merge) na lane 0
        merge_position = road.network.get_lane(("b", "c", 0)).position(0, 0)  # Ponto de mesclagem na autoestrada
        
        # Posição inicial do veículo ego na lane de mesclagem
        ego_initial_position = road.network.get_lane(("j", "k", 0)).position(30, 0)  # Ego vehicle na lane de mesclagem

        # Posição inicial do veículo da autoestrada na lane mais à direita (lane 1)
        highway_vehicle_initial_position = road.network.get_lane(("a", "b", 1)).position(80, 0)  # Na lane 1 da autoestrada

        # Definir velocidades iniciais
        ego_speed = 20  # Velocidade inicial do ego
        highway_speed = 30  # Velocidade inicial do veículo na autoestrada

        # Calcular o tempo para ambos os veículos chegarem ao ponto de mesclagem
        time_to_merge = (merge_position[0] - ego_initial_position[0]) / ego_speed

        # Ajustar a velocidade do veículo da autoestrada para garantir que ambos cheguem ao mesmo tempo
        highway_vehicle_speed = (merge_position[0] - highway_vehicle_initial_position[0]) / time_to_merge

        # Criar o veículo ego na lane de mesclagem
        ego_vehicle = self.action_type.vehicle_class(
            road, ego_initial_position, speed=ego_speed
        )
        road.vehicles.append(ego_vehicle)

        # Criar o veículo na lane da direita da autoestrada (lane 1)
        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        # Definir o veículo ego como o veículo principal
        self.vehicle = ego_vehicle

        # Debug: Verificar posições e velocidades dos veículos
        print(f"Posição do veículo ego: {ego_vehicle.position}, Velocidade: {ego_vehicle.speed}")
        print(f"Posição do veículo da autoestrada: {highway_vehicle.position}, Velocidade: {highway_vehicle.speed}")



    def _reward(self, action: int) -> float:
        """
        Custom reward function that incentivizes the ego vehicle to accelerate and merge onto the highway
        ahead of the highway vehicle.
        """
        # Get the original reward from the parent class (if it exists)
        reward = super()._reward(action)
        
        ego_vehicle = self.vehicle
        road = self.road

        # Find the highway vehicle (vehicle in the rightmost lane)
        highway_vehicle = None
        for vehicle in road.vehicles:
            if isinstance(vehicle, RightLaneVehicle):  # Identify the highway vehicle
                highway_vehicle = vehicle
                break

        if not highway_vehicle:
            return reward

        # Calculate relative positions and velocities
        distance_to_highway_vehicle = highway_vehicle.position[0] - ego_vehicle.position[0]
        # is_ahead = distance_to_highway_vehicle > 0  # Check if the highway vehicle is ahead
        near_merge_point = abs(ego_vehicle.position[0] - road.network.get_lane(("b", "c", 0)).position(0, 0)[0]) < 100
        print(near_merge_point)
        # Estimate acceleration based on change in speed
        if not hasattr(self, "_previous_speed"):
            self._previous_speed = ego_vehicle.speed  # Initialize previous speed

        # Calculate acceleration as change in speed over time (assuming time step of 1)
        acceleration = ego_vehicle.speed - self._previous_speed
        self._previous_speed = ego_vehicle.speed  # Update for the next step

        # Reward for accelerating and merging ahead of the highway vehicle
        merging_reward = 0.0
        if near_merge_point:
            print("Near merge point")
            # Reward ego vehicle for accelerating
            if ego_vehicle.speed > highway_vehicle.speed and acceleration > 0:
                print("Accelerating successfully")
                merging_reward = self.config.get("acceleration_bonus", 1.5)  # Incentive for accelerating
            else:
                print("Not accelerating enough")
                merging_reward -= self.config.get("acceleration_penalty", 1.0)  # Penalty for not accelerating
            
            # Additional reward if ego vehicle successfully gets ahead of the highway vehicle
            if distance_to_highway_vehicle < 0:  # Ego vehicle is ahead of the highway vehicle
                print("Successfully merged ahead")
                merging_reward += self.config.get("merging_bonus", 3.0)
            else:
                print("Failed to merge ahead")
                merging_reward -= self.config.get("merging_penalty", 2.0)

        # Total reward includes the merging incentive
        reward += merging_reward

        # Debug information
        print(f"Distance to highway vehicle: {distance_to_highway_vehicle}, Ego speed: {ego_vehicle.speed}, Highway speed: {highway_vehicle.speed}")
        print(f"Merging reward: {merging_reward}, Total reward: {reward}")

        return reward


In [7]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv',  # Entry point for your custom environment
)

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [8]:
env = gym.make("CustomMerge-v0", render_mode='rgb_array')
pprint.pprint(env.unwrapped.config)

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0
{'action': {'type': 'DiscreteMetaAction'},
 'centering_position': [0.3, 0.5],
 'collision_reward': -1,
 'high_speed_reward': 0.2,
 'lane_change_reward': -0.05,
 'manual_control': False,
 'merging_speed_reward': -0.5,
 'observation': {'type': 'Kinematics'},
 'offscreen_rendering': False,
 'other_vehicles_type': 'highway_env.vehicle.behavior.IDMVehicle',
 'policy_frequency': 1,
 'real_time_rendering': False,
 'render_agent': True,
 'reward_speed_range': [20, 30],
 'right_lane_reward': 0.1,
 'scaling': 5.5,
 'screen_height': 150,
 'screen_width': 600,
 'show_trajectories': False,
 'simulation_frequency': 15}


In [9]:
# Para testar se o ambiente está correto
env.reset()
for _ in range(100):
    action = env.unwrapped.action_type.actions_indexes["IDLE"]
    obs, reward, done, truncated, info = env.step(action)
    env.render()

plt.imshow(env.render())
plt.show()

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0
False
Distance to highway vehicle: 44.99999999999997, Ego speed: 20.0, Highway speed: 15.0
Merging reward: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
False
Distance to highway vehicle: 39.99999999999997, Ego speed: 20.0, Highway speed: 15.0
Merging reward: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
False
Distance to highway vehicle: 35.00000000000004, Ego speed: 20.0, Highway speed: 15.0
Merging reward: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
False
Distance to highway vehicle: 30.000000000000114, Ego speed: 20.0, Highway speed: 15.0
Merging reward: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
False
Distance to highway vehicle: 25.00000000000017, Ego speed: 20.0, Highway speed: 15.0
Merging reward: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
True
Near merge point
Not accelerating enough
Failed t

AttributeError: 'NoneType' object has no attribute 'get_image'

### **Training the model**

In [None]:
# model = PPO('MlpPolicy', env,
#             policy_kwargs=dict(net_arch=[256, 256]),
#             learning_rate=5e-4,
#             n_steps=2048, 
#             batch_size=64, 
#             n_epochs=10,  
#             gamma=0.8,
#             gae_lambda=0.95, 
#             clip_range=0.2, 
#             verbose=1,
#             tensorboard_log="env_ego_entering_brake_close/")
# timesteps = 50000
# model.learn(total_timesteps=timesteps)
# model.save("env_ego_entering_brake_close/model")