# **Study on the Minimum Gap between Highway Cars for the Ego to Merge into the Highway**

##### METER INTRODUÇÃO      

### **Imports**

In [27]:
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
import highway_env
import pandas as pd
import time
import numpy as np
from stable_baselines3 import PPO
from highway_env import utils
from highway_env.envs import MergeEnv
from highway_env.vehicle.controller import ControlledVehicle
%matplotlib inline

### **Creation of the environment**

##### With the ego-vehicle on the merging lane and two vehicles on the highway, on the right most lane and with a certain distance between them

##### With a distance of 20

In [28]:
class RightLaneVehicle(ControlledVehicle):
    """
    Um veículo que é restrito a ficar na lane da direita e nunca muda de lane.
    """
    def act(self, action: int = None) -> None:
        # Assegura que o veículo não mude de lane (desautoriza ações 0 e 2 para mudança de lane)
        if action in [0, 2]:  # Ações para mudar para a esquerda ou direita
            action = 1  # Forçar a manter a lane (ação 1)
        super().act(action)


class CustomMergeEnv20(MergeEnv):
    def _make_vehicles(self) -> None:
        road = self.road

        # Ponto de mesclagem (merge) na lane 0
        merge_position = road.network.get_lane(("b", "c", 0)).position(0, 0)  # Ponto de mesclagem na autoestrada
        
        # Posição inicial do veículo ego na lane de mesclagem
        ego_initial_position = road.network.get_lane(("j", "k", 0)).position(30, 0)  # Ego vehicle na lane de mesclagem

        # Posição inicial do veículo da autoestrada na lane mais à direita (lane 1)
        highway_vehicle_initial_position = road.network.get_lane(("a", "b", 1)).position(100, 0)  # Na lane 1 da autoestrada
        highway_vehicle_initial_position_1 = road.network.get_lane(("a", "b", 1)).position(80, 0)  # Na lane 1 da autoestrada

        # Definir velocidades iniciais
        ego_speed = 20  # Velocidade inicial do ego

        # Calcular o tempo para ambos os veículos chegarem ao ponto de mesclagem
        time_to_merge = (merge_position[0] - ego_initial_position[0]) / ego_speed

        # Ajustar a velocidade do veículo da autoestrada para garantir que ambos cheguem ao mesmo tempo
        highway_vehicle_speed = (merge_position[0] - highway_vehicle_initial_position[0]) / time_to_merge

        # Criar o veículo ego na lane de mesclagem
        ego_vehicle = self.action_type.vehicle_class(
            road, ego_initial_position, speed=ego_speed
        )
        road.vehicles.append(ego_vehicle)

        # Criar o veículo na lane da direita da autoestrada (lane 1)
        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position_1, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        # Definir o veículo ego como o veículo principal
        self.vehicle = ego_vehicle


##### With a distance of 40

In [29]:
class RightLaneVehicle(ControlledVehicle):
    """
    Um veículo que é restrito a ficar na lane da direita e nunca muda de lane.
    """
    def act(self, action: int = None) -> None:
        # Assegura que o veículo não mude de lane (desautoriza ações 0 e 2 para mudança de lane)
        if action in [0, 2]:  # Ações para mudar para a esquerda ou direita
            action = 1  # Forçar a manter a lane (ação 1)
        super().act(action)


class CustomMergeEnv40(MergeEnv):
    def _make_vehicles(self) -> None:
        road = self.road

        # Ponto de mesclagem (merge) na lane 0
        merge_position = road.network.get_lane(("b", "c", 0)).position(0, 0)  # Ponto de mesclagem na autoestrada
        
        # Posição inicial do veículo ego na lane de mesclagem
        ego_initial_position = road.network.get_lane(("j", "k", 0)).position(30, 0)  # Ego vehicle na lane de mesclagem

        # Posição inicial do veículo da autoestrada na lane mais à direita (lane 1)
        highway_vehicle_initial_position = road.network.get_lane(("a", "b", 1)).position(100, 0)  # Na lane 1 da autoestrada
        highway_vehicle_initial_position_1 = road.network.get_lane(("a", "b", 1)).position(60, 0)  # Na lane 1 da autoestrada

        # Definir velocidades iniciais
        ego_speed = 20  # Velocidade inicial do ego

        # Calcular o tempo para ambos os veículos chegarem ao ponto de mesclagem
        time_to_merge = (merge_position[0] - ego_initial_position[0]) / ego_speed

        # Ajustar a velocidade do veículo da autoestrada para garantir que ambos cheguem ao mesmo tempo
        highway_vehicle_speed = (merge_position[0] - highway_vehicle_initial_position[0]) / time_to_merge

        # Criar o veículo ego na lane de mesclagem
        ego_vehicle = self.action_type.vehicle_class(
            road, ego_initial_position, speed=ego_speed
        )
        road.vehicles.append(ego_vehicle)

        # Criar o veículo na lane da direita da autoestrada (lane 1)
        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position_1, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        # Definir o veículo ego como o veículo principal
        self.vehicle = ego_vehicle


##### With a distance of 60

In [30]:
class RightLaneVehicle(ControlledVehicle):
    """
    Um veículo que é restrito a ficar na lane da direita e nunca muda de lane.
    """
    def act(self, action: int = None) -> None:
        # Assegura que o veículo não mude de lane (desautoriza ações 0 e 2 para mudança de lane)
        if action in [0, 2]:  # Ações para mudar para a esquerda ou direita
            action = 1  # Forçar a manter a lane (ação 1)
        super().act(action)


class CustomMergeEnv60(MergeEnv):
    def _make_vehicles(self) -> None:
        road = self.road

        # Ponto de mesclagem (merge) na lane 0
        merge_position = road.network.get_lane(("b", "c", 0)).position(0, 0)  # Ponto de mesclagem na autoestrada
        
        # Posição inicial do veículo ego na lane de mesclagem
        ego_initial_position = road.network.get_lane(("j", "k", 0)).position(30, 0)  # Ego vehicle na lane de mesclagem

        # Posição inicial do veículo da autoestrada na lane mais à direita (lane 1)
        highway_vehicle_initial_position = road.network.get_lane(("a", "b", 1)).position(100, 0)  # Na lane 1 da autoestrada
        highway_vehicle_initial_position_1 = road.network.get_lane(("a", "b", 1)).position(40, 0)  # Na lane 1 da autoestrada

        # Definir velocidades iniciais
        ego_speed = 20  # Velocidade inicial do ego

        # Calcular o tempo para ambos os veículos chegarem ao ponto de mesclagem
        time_to_merge = (merge_position[0] - ego_initial_position[0]) / ego_speed

        # Ajustar a velocidade do veículo da autoestrada para garantir que ambos cheguem ao mesmo tempo
        highway_vehicle_speed = (merge_position[0] - highway_vehicle_initial_position[0]) / time_to_merge

        # Criar o veículo ego na lane de mesclagem
        ego_vehicle = self.action_type.vehicle_class(
            road, ego_initial_position, speed=ego_speed
        )
        road.vehicles.append(ego_vehicle)

        # Criar o veículo na lane da direita da autoestrada (lane 1)
        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position_1, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        # Definir o veículo ego como o veículo principal
        self.vehicle = ego_vehicle


##### With a distance of 80

In [31]:
class RightLaneVehicle(ControlledVehicle):
    """
    Um veículo que é restrito a ficar na lane da direita e nunca muda de lane.
    """
    def act(self, action: int = None) -> None:
        # Assegura que o veículo não mude de lane (desautoriza ações 0 e 2 para mudança de lane)
        if action in [0, 2]:  # Ações para mudar para a esquerda ou direita
            action = 1  # Forçar a manter a lane (ação 1)
        super().act(action)


class CustomMergeEnv80(MergeEnv):
    def _make_vehicles(self) -> None:
        road = self.road

        # Ponto de mesclagem (merge) na lane 0
        merge_position = road.network.get_lane(("b", "c", 0)).position(0, 0)  # Ponto de mesclagem na autoestrada
        
        # Posição inicial do veículo ego na lane de mesclagem
        ego_initial_position = road.network.get_lane(("j", "k", 0)).position(30, 0)  # Ego vehicle na lane de mesclagem

        # Posição inicial do veículo da autoestrada na lane mais à direita (lane 1)
        highway_vehicle_initial_position = road.network.get_lane(("a", "b", 1)).position(100, 0)  # Na lane 1 da autoestrada
        highway_vehicle_initial_position_1 = road.network.get_lane(("a", "b", 1)).position(20, 0)  # Na lane 1 da autoestrada

        # Definir velocidades iniciais
        ego_speed = 20  # Velocidade inicial do ego

        # Calcular o tempo para ambos os veículos chegarem ao ponto de mesclagem
        time_to_merge = (merge_position[0] - ego_initial_position[0]) / ego_speed

        # Ajustar a velocidade do veículo da autoestrada para garantir que ambos cheguem ao mesmo tempo
        highway_vehicle_speed = (merge_position[0] - highway_vehicle_initial_position[0]) / time_to_merge

        # Criar o veículo ego na lane de mesclagem
        ego_vehicle = self.action_type.vehicle_class(
            road, ego_initial_position, speed=ego_speed
        )
        road.vehicles.append(ego_vehicle)

        # Criar o veículo na lane da direita da autoestrada (lane 1)
        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position_1, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        # Definir o veículo ego como o veículo principal
        self.vehicle = ego_vehicle


##### With a distance of 100

In [None]:
class RightLaneVehicle(ControlledVehicle):
    """
    Um veículo que é restrito a ficar na lane da direita e nunca muda de lane.
    """
    def act(self, action: int = None) -> None:
        # Assegura que o veículo não mude de lane (desautoriza ações 0 e 2 para mudança de lane)
        if action in [0, 2]:  # Ações para mudar para a esquerda ou direita
            action = 1  # Forçar a manter a lane (ação 1)
        super().act(action)


class CustomMergeEnv100(MergeEnv):
    def _make_vehicles(self) -> None:
        road = self.road

        # Ponto de mesclagem (merge) na lane 0
        merge_position = road.network.get_lane(("b", "c", 0)).position(0, 0)  # Ponto de mesclagem na autoestrada
        
        # Posição inicial do veículo ego na lane de mesclagem
        ego_initial_position = road.network.get_lane(("j", "k", 0)).position(30, 0)  # Ego vehicle na lane de mesclagem

        # Posição inicial do veículo da autoestrada na lane mais à direita (lane 1)
        highway_vehicle_initial_position = road.network.get_lane(("a", "b", 1)).position(100, 0)  # Na lane 1 da autoestrada
        highway_vehicle_initial_position_1 = road.network.get_lane(("a", "b", 1)).position(0, 0)  # Na lane 1 da autoestrada

        # Definir velocidades iniciais
        ego_speed = 20  # Velocidade inicial do ego

        # Calcular o tempo para ambos os veículos chegarem ao ponto de mesclagem
        time_to_merge = (merge_position[0] - ego_initial_position[0]) / ego_speed

        # Ajustar a velocidade do veículo da autoestrada para garantir que ambos cheguem ao mesmo tempo
        highway_vehicle_speed = (merge_position[0] - highway_vehicle_initial_position[0]) / time_to_merge

        # Criar o veículo ego na lane de mesclagem
        ego_vehicle = self.action_type.vehicle_class(
            road, ego_initial_position, speed=ego_speed
        )
        road.vehicles.append(ego_vehicle)

        # Criar o veículo na lane da direita da autoestrada (lane 1)
        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position_1, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        # Definir o veículo ego como o veículo principal
        self.vehicle = ego_vehicle

    def _reward(self, action: int) -> float:
        """
        Custom reward function that penalizes the ego vehicle if its actions influence the highway vehicle,
        while incentivizing efficient merging behavior.
        """
        # Get the original reward from the parent class (if it exists)
        reward = super()._reward(action)
        
        ego_vehicle = self.vehicle
        road = self.road

        # Find the highway vehicles
        highway_vehicles = []
        for vehicle in road.vehicles:
            if isinstance(vehicle, RightLaneVehicle):  # Identify the highway vehicle
                highway_vehicles.append(vehicle)
                break

        if not highway_vehicles:
            return reward
        

        highway_vehicle_1 = highway_vehicles[0]
        highway_vehicle_2 = highway_vehicles[1]

        highway_vehicle_position_1 = highway_vehicle_1.position[0]
        highway_vehicle_position_2 = highway_vehicle_2.position[0]

        ego_vehicle_position = ego_vehicle.position[0]  # Posição do ego_vehicle no eixo x

        successful_merge_reward = 0.0
        unsuccessful_merge_reward = 0.0
        not_merge_penalty = 0.0

        # Verifica se os 3 veículos estão na mesma via
        if ego_vehicle.lane_index[2] == highway_vehicle_1.lane_index[2] == highway_vehicle_2.lane_index[2]:
            ego_vehicle_position = ego_vehicle.position[0]  # Posição do ego_vehicle no eixo x
            
            # Verifica se o ego_vehicle está à frente do veículo 2 e atrás do veículo 1
            if highway_vehicle_position_2 < ego_vehicle_position < highway_vehicle_position_1:
                successful_merge_reward = self.config.get("successful_merge", 100.0)
            else:
                unsuccessful_merge_reward = self.config.get("unsuccessful_merge", -100.0)
        else:
            not_merge_penalty = self.config.get("not_merge_penalty", -5.0) # Obriga o ego a fazer merge

        # Total reward includes the merging incentive and interference penalty
        reward += successful_merge_reward + unsuccessful_merge_reward + not_merge_penalty

        return reward


In [34]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v20',
    entry_point='__main__:CustomMergeEnv20',  # Entry point for your custom environment
)

# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v40',
    entry_point='__main__:CustomMergeEnv40',  # Entry point for your custom environment
)

# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v60',
    entry_point='__main__:CustomMergeEnv60',  # Entry point for your custom environment
)

# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v80',
    entry_point='__main__:CustomMergeEnv80',  # Entry point for your custom environment
)

# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v100',
    entry_point='__main__:CustomMergeEnv100',  # Entry point for your custom environment
)

In [38]:
env_20 = gym.make("CustomMerge-v20", render_mode='rgb_array')
env_40 = gym.make("CustomMerge-v40", render_mode='rgb_array')
env_60 = gym.make("CustomMerge-v60", render_mode='rgb_array')
env_80 = gym.make("CustomMerge-v80", render_mode='rgb_array')
env_100 = gym.make("CustomMerge-v100", render_mode='rgb_array')

  logger.deprecation(
  logger.deprecation(
  logger.deprecation(
  logger.deprecation(


### **Training the models**

In [None]:
model = PPO('MlpPolicy', env_20,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_minimum_gap_20/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_minimum_gap_20/model")

In [None]:
model = PPO('MlpPolicy', env_40,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_minimum_gap_40/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_minimum_gap_40/model")

In [None]:
model = PPO('MlpPolicy', env_60,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_minimum_gap_60/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_minimum_gap_60/model")

In [None]:
model = PPO('MlpPolicy', env_80,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_minimum_gap_80/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_minimum_gap_80/model")

In [None]:
model = PPO('MlpPolicy', env_100,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_minimum_gap_100/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_minimum_gap_100/model")

### **Evaluate and compare the models**

**For the minimum gap 20**
- Average Reward:
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For the minimum gap 40**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For the minimum gap 60**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions:
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For the minimum gap 80**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For the minimum gap 100**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

In [None]:
def evaluate_agent(model, env, num_episodes, success_speed_range, speed_threshold_ratio=0.3):
    total_rewards = []  # List to store total rewards for each episode
    total_collisions = 0  # Counter for total collisions across all episodes
    successful_merges = 0  # Counter for successful merges
    dangerous_driving_episodes = 0  # Counter for episodes with dangerous driving behavior
    total_steps_to_merge = []  # List to store the number of steps taken to merge in each episode
    total_episode_times = []  # List to store the time taken for each episode

    # Calculate the threshold based on the speed range
    reward_speed_range = env.config["reward_speed_range"]
    speed_threshold = (reward_speed_range[1] - reward_speed_range[0]) * speed_threshold_ratio  # Speed threshold for detecting sudden changes

    for episode in range(num_episodes):
        start_time = time.time()  # Record the start time of the episode
        obs, info = env.reset()  # Reset the environment and get the initial observation
        done = False  # Variable to track if the episode is finished
        episode_reward = 0  # Variable to track the reward for the current episode
        collisions = 0  # Counter for collisions in the current episode
        dangerous_driving = False  # Flag to indicate if dangerous driving occurred
        steps_to_merge = 0  # Counter for steps taken to merge
        last_speed = None  # Initialize last speed as None

        # Store the positions of highway vehicles for successful merge check
        highway_vehicle_positions = []

        while not done:  # Loop until the episode is done
            # The agent chooses an action
            action, _states = model.predict(obs, deterministic=True)
            # Execute the action in the environment
            obs, reward, terminated, truncated, info = env.step(action)

            # Store highway vehicle positions
            if 'highway_vehicles' in info:
                highway_vehicle_positions = [vehicle.position[0] for vehicle in info['highway_vehicles']]

            dangerous_driving = False  # Reset dangerous driving flag
            episode_reward += reward  # Accumulate reward for the episode
            steps_to_merge += 1  # Increment the steps to merge counter

            # Check the current speed and round to 2 decimal places
            current_speed = round(info.get('speed', 0), 2)

            # Check for sudden speed changes
            if last_speed is not None and abs(current_speed - last_speed) > speed_threshold:
                dangerous_driving = True  # Mark as dangerous driving if speed change exceeds threshold

            last_speed = current_speed  # Update the last speed for the next iteration

            # Check for collisions
            if 'crashed' in info and info['crashed']:
                collisions += 1  # Increment collision counter if a crash occurred

            # Check if the episode has ended (either 'terminated' or 'truncated')
            done = terminated or truncated

        # Consider a successful merge if ego vehicle is between the two highway vehicles
        if highway_vehicle_positions:
            ego_vehicle_position = info['ego_vehicle_position'][0]  # Ego vehicle's x-coordinate
            highway_vehicle_position_1 = highway_vehicle_positions[0]
            highway_vehicle_position_2 = highway_vehicle_positions[1]

            # Check if ego vehicle is between the two highway vehicles in terms of x-coordinate
            if (highway_vehicle_position_1 < ego_vehicle_position < highway_vehicle_position_2) or \
               (highway_vehicle_position_2 < ego_vehicle_position < highway_vehicle_position_1):
                successful_merges += 1  # Increment successful merges if ego vehicle is between highway vehicles

        # Log episode metrics
        total_rewards.append(episode_reward)  # Add episode reward to the total rewards list
        total_collisions += collisions  # Update total collisions count
        total_steps_to_merge.append(steps_to_merge)  # Add steps to merge for this episode

        if dangerous_driving:
            dangerous_driving_episodes += 1  # Increment count of dangerous driving episodes

        # Calculate the time taken for the episode and add to the list
        episode_time = time.time() - start_time  # Calculate elapsed time
        total_episode_times.append(episode_time)  # Add episode time to the list

    # Final metric calculations
    avg_reward = np.mean(total_rewards)  # Calculate average reward
    avg_steps_to_merge = np.mean(total_steps_to_merge)  # Calculate average steps to merge
    avg_episode_time = np.mean(total_episode_times)  # Calculate average episode time

    # Display results
    print(f"Average Reward: {avg_reward}")  # Print average reward
    print(f"Average Steps to Merge: {avg_steps_to_merge}")  # Print average steps to merge
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")  # Print average episode time
    print(f"Number of Collisions: {total_collisions}")  # Print total collisions
    print(f"Successful Merges: {successful_merges}")  # Print number of successful merges
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")  # Print count of dangerous driving episodes

    return {
        "avg_reward": avg_reward,  # Return average reward
        "avg_steps_to_merge": avg_steps_to_merge,  # Return average steps to merge
        "avg_episode_time": avg_episode_time,  # Return average episode time
        "number_collisions": total_collisions,  # Return total number of collisions
        "successful_merges": successful_merges,  # Return number of successful merges
        "number_dangerous_episodes": dangerous_driving_episodes  # Return number of dangerous driving episodes
    }

In [None]:
# Load the trained model
model = PPO.load("env_minimum_gap_20/model")  

# Evaluate the model
results = evaluate_agent(model, env_20, 200, (0, 10)) 

In [None]:
# Load the trained model
model = PPO.load("env_minimum_gap_40/model")  

# Evaluate the model
results = evaluate_agent(model, env_40, 200, (0, 10)) 

In [None]:
# Load the trained model
model = PPO.load("env_minimum_gap_60/model")  

# Evaluate the model
results = evaluate_agent(model, env_60, 200, (0, 10)) 

In [None]:
# Load the trained model
model = PPO.load("env_minimum_gap_80/model")  

# Evaluate the model
results = evaluate_agent(model, env_80, 200, (0, 10)) 

In [None]:
# Load the trained model
model = PPO.load("env_minimum_gap_100/model")  

# Evaluate the model
results = evaluate_agent(model, env_100, 200, (0, 10)) 

Textinho a analisar os resultados

DIMINUIR O MELHOR INTERVALO?

IMAGINAR QUE É 40: 

METER DE 2 EM 2: 40, 38, 36, 34, 32, 30, ... ATÉ AO 22?

METER DE 5 EM 5: 40, 35, 30, 25?

COLOCAR CÓDIGO PARA GERAR VÍDEO DO MELHOR