# **Study on the Ideal Behaviour for Merging into the Highway**

##### This study aims to determine the optimal strategy for the ego vehicle to safely and efficiently merge onto a highway, prioritizing the action of braking to allow oncoming vehicles to pass. The variables under consideration are the reward for the braking action, which will be shaped based on how close the oncoming vehicle is, and an influence penalty, which penalizes in case the other vehicle behaviour changes due to the ego vehicle. The goal is to find the optimal reward configuration that encourages the ego vehicle to brake at the right moment, ensuring both safety and traffic efficiency and affect the least the behaviour of the other vehicle.

### **Imports**

In [1]:
from IPython.display import Video
import cv2
import imageio
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
import highway_env
import pandas as pd
import time
import numpy as np
from stable_baselines3 import PPO
from highway_env import utils
from highway_env.envs import MergeEnv
from highway_env.vehicle.controller import ControlledVehicle
%matplotlib inline

### **Creation of the environment**

##### With the ego-vehicle on the merging lane and a single vehicle on the highway, on the right most lane and a costumized reward function

In [2]:
class RightLaneVehicle(ControlledVehicle):
    """
    Um veículo que é restrito a ficar na lane da direita e nunca muda de lane.
    """
    def act(self, action: int = None) -> None:
        # Assegura que o veículo não mude de lane (desautoriza ações 0 e 2 para mudança de lane)
        if action in [0, 2]:  # Ações para mudar para a esquerda ou direita
            action = 1  # Forçar a manter a lane (ação 1)
        super().act(action)


class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        road = self.road

        # Ponto de mesclagem (merge) na lane 0
        merge_position = road.network.get_lane(("b", "c", 0)).position(0, 0)  # Ponto de mesclagem na autoestrada
        
        # Posição inicial do veículo ego na lane de mesclagem
        ego_initial_position = road.network.get_lane(("j", "k", 0)).position(30, 0)  # Ego vehicle na lane de mesclagem

        # Posição inicial do veículo da autoestrada na lane mais à direita (lane 1)
        highway_vehicle_initial_position = road.network.get_lane(("a", "b", 1)).position(80, 0)  # Na lane 1 da autoestrada

        # Definir velocidades iniciais
        ego_speed = 20  # Velocidade inicial do ego
        highway_speed = 30  # Velocidade inicial do veículo na autoestrada

        # Calcular o tempo para ambos os veículos chegarem ao ponto de mesclagem
        time_to_merge = (merge_position[0] - ego_initial_position[0]) / ego_speed

        # Ajustar a velocidade do veículo da autoestrada para garantir que ambos cheguem ao mesmo tempo
        highway_vehicle_speed = (merge_position[0] - highway_vehicle_initial_position[0]) / time_to_merge

        # Criar o veículo ego na lane de mesclagem
        ego_vehicle = self.action_type.vehicle_class(
            road, ego_initial_position, speed=ego_speed
        )
        road.vehicles.append(ego_vehicle)

        # Criar o veículo na lane da direita da autoestrada (lane 1)
        highway_vehicle = RightLaneVehicle(
            road, highway_vehicle_initial_position, speed=highway_vehicle_speed
        )
        road.vehicles.append(highway_vehicle)

        # Definir o veículo ego como o veículo principal
        self.vehicle = ego_vehicle

        # Debug: Verificar posições e velocidades dos veículos
        print(f"Posição do veículo ego: {ego_vehicle.position}, Velocidade: {ego_vehicle.speed}")
        print(f"Posição do veículo da autoestrada: {highway_vehicle.position}, Velocidade: {highway_vehicle.speed}")



    def _reward(self, action: int) -> float:
        """
        Custom reward function that incentivizes the ego vehicle to brake near the merging point 
        and let the highway vehicle pass before merging.
        """
        # Get the original reward from the parent class (if it exists)
        reward = super()._reward(action)
        
        ego_vehicle = self.vehicle
        road = self.road

        # Find the highway vehicle (vehicle in the rightmost lane)
        highway_vehicle = None
        for vehicle in road.vehicles:
            if isinstance(vehicle, RightLaneVehicle):  # Identify the highway vehicle
                highway_vehicle = vehicle
                break
        
        if not highway_vehicle:
            return reward

        # Calculate relative positions and velocities
        distance_to_highway_vehicle = highway_vehicle.position[0] - ego_vehicle.position[0]
        is_ahead = distance_to_highway_vehicle > 0  # Check if the highway vehicle is ahead
        near_merge_point = abs(ego_vehicle.position[0] - road.network.get_lane(("b", "c", 0)).position(0, 0)[0]) < 100

        # Estimate acceleration based on change in speed
        if not hasattr(self, "_previous_speed"):
            self._previous_speed = ego_vehicle.speed  # Initialize previous speed

        # Calculate acceleration as change in speed over time (assuming time step of 1)
        acceleration = ego_vehicle.speed - self._previous_speed
        self._previous_speed = ego_vehicle.speed  # Update for the next step

        # Estimate acceleration of the highway vehicle
        if not hasattr(self, "_previous_highway_speed"):
            self._previous_highway_speed = highway_vehicle.speed  # Initialize previous speed

        highway_acceleration = highway_vehicle.speed - self._previous_highway_speed
        self._previous_highway_speed = highway_vehicle.speed  # Update for the next step

        
        # Reward for braking and letting the highway vehicle pass
        braking_reward = 0.0
        if near_merge_point and is_ahead:
            # Ego vehicle should brake
            if ego_vehicle.speed < highway_vehicle.speed and acceleration < 0:
                braking_reward = self.config.get("braking_bonus", 1.0)  # Incentive for braking
            else:
                braking_reward = -self.config.get("braking_penalty", 1.0)
            # Additional reward if ego vehicle stays behind the highway vehicle
            if distance_to_highway_vehicle > 0 and ego_vehicle.speed < highway_vehicle.speed:
                braking_reward += self.config.get("yielding_bonus", 2.0)
            else:
                braking_reward -= self.config.get("yielding_penalty", 2.0)
        
        # Penalize interference with the highway vehicle
        influence_penalty = 0.0
        if near_merge_point and distance_to_highway_vehicle < 20:  # Close to the highway vehicle
            if highway_acceleration < -1.0:  # Significant deceleration detected
                influence_penalty = self.config.get("influence_penalty", 5.0)  # Large penalty for interference


        # Total reward includes the braking incentive and interference penalty
        reward += braking_reward - influence_penalty

        # Debug information
        print(f"Distance to highway vehicle: {distance_to_highway_vehicle}, Ego speed: {ego_vehicle.speed}, Highway speed: {highway_vehicle.speed}, Highway acceleration: {highway_acceleration}")
        print(f"Braking reward: {braking_reward}, Influence penalty: {influence_penalty}, Total reward: {reward}")

        return reward


In [3]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv', 
)

### **Training the models for several rewards**

#### Initial configuration with balanced values

In [4]:
env_v0 = gym.make("CustomMerge-v0", render_mode='rgb_array')
env_v0.unwrapped.config.update({
    "braking_bonus": 1.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 2.0,
    "yielding_penalty": 2.0,
    "influence_penalty": 5.0
})

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0


In [None]:
model = PPO('MlpPolicy', env_v0,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_0/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_0/model")

#### Configuration to force the ego vehicle to let the highway vehicle go ahead

In [5]:
env_v1 = gym.make("CustomMerge-v0", render_mode='rgb_array')
env_v1.unwrapped.config.update({
    "braking_bonus": 1.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 4.0,
    "yielding_penalty": 2.0,
    "influence_penalty": 5.0
})

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0


In [None]:
model = PPO('MlpPolicy', env_v1,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_1/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_1/model")

#### Configuration to severally punish influences on the highway vehicle behaviour

In [6]:
env_v2 = gym.make("CustomMerge-v0", render_mode='rgb_array')
env_v2.unwrapped.config.update({
    "braking_bonus": 1.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 2.0,
    "yielding_penalty": 2.0,
    "influence_penalty": 10.0
})

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0


In [None]:
model = PPO('MlpPolicy', env_v2,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_2/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_2/model")

#### "Safe" configuration - increase the yielding_bonus and the influence_penalty

In [7]:
env_v3 = gym.make("CustomMerge-v0", render_mode='rgb_array')
env_v3.unwrapped.config.update({
    "braking_bonus": 1.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 4.0,
    "yielding_penalty": 2.0,
    "influence_penalty": 10.0
})

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0


In [None]:
model = PPO('MlpPolicy', env_v3,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_3/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_3/model")

#### "Agressive" configuration - Reduce yielding_bonus and increase the braking_bonus

In [8]:
env_v4 = gym.make("CustomMerge-v0", render_mode='rgb_array')
env_v4.unwrapped.config.update({
    "braking_bonus": 4.0,
    "braking_penalty": 1.0,
    "yielding_bonus": 0.5,
    "yielding_penalty": 2.0,
    "influence_penalty": 5.0
})

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0


In [None]:
model = PPO('MlpPolicy', env_v4,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_ego_entering_brake_close_4/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_ego_entering_brake_close_4/model")

### **Evaluate and compare the models**

**For env_v0**
- Average Reward:
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For env_v1**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For env_v2**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions:
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For env_v3**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

**For env_v4**
- Average Reward: 
- Average Steps to Merge: 
- Average Episode Time: 
- Number of Collisions: 
- Successful Merges: 
- Number of Dangerous Driving Episodes (sudden speed changes): 

In [12]:
def evaluate_agent(model, env, num_episodes, speed_threshold_ratio=0.5):
    total_rewards = []  # List to store total rewards for each episode
    total_collisions = 0  # Counter for total collisions across all episodes
    successful_merges = 0  # Counter for successful merges
    dangerous_driving_episodes = 0  # Counter for episodes with dangerous driving behavior
    total_steps_to_merge = []  # List to store the number of steps taken to merge in each episode
    total_episode_times = []  # List to store the time taken for each episode

    # Cálculo do threshold de velocidade
    reward_speed_range = env.unwrapped.config["reward_speed_range"]
    speed_threshold = (reward_speed_range[1] - reward_speed_range[0]) * speed_threshold_ratio  # Limite para mudanças repentinas de velocidade

    for episode in range(num_episodes):
        start_time = time.time()  # Record the start time of the episode
        obs, info = env.reset()  # Reset the environment and get the initial observation
        done = False  # Variable to track if the episode is finished
        episode_reward = 0  # Variable to track the reward for the current episode
        collisions = 0  # Counter for collisions in the current episode
        dangerous_driving = False  # Flag to indicate if dangerous driving occurred
        steps_to_merge = 0  # Counter for steps taken to merge
        last_speed = None  # Initialize last speed as None

        # Armazenar as posições dos veículos na rodovia para verificar a fusão
        highway_vehicles = []
        for vehicle in env.road.vehicles:
            # Verifica se o veículo não é o ego vehicle
            if vehicle != env.vehicle:
                highway_vehicles.append(vehicle)

        while not done:  # Loop until the episode is done
            # The agent chooses an action
            action, _states = model.predict(obs, deterministic=True)
            # Execute the action in the environment
            obs, reward, terminated, truncated, info = env.step(action)

            episode_reward += reward  # Accumulate reward for the episode
            steps_to_merge += 1  # Increment the steps to merge counter

            # Check the current speed and round to 2 decimal places
            current_speed = round(info.get('speed', 0), 2)

            # Check for sudden speed changes
            if last_speed is not None and abs(current_speed - last_speed) > speed_threshold:
                dangerous_driving = True  # Mark as dangerous driving if speed change exceeds threshold

            last_speed = current_speed  # Update the last speed for the next iteration

            # Check for collisions
            if 'crashed' in info and info['crashed']:
                collisions += 1  # Increment collision counter if a crash occurred

            # Check if the episode has ended (either 'terminated' or 'truncated')
            done = terminated or truncated

            ego_position = env.road.vehicles[0].position[0]  # Get the position of the ego vehicle
            highway_vehicles_positions = [vehicle.position[0] for vehicle in env.road.vehicles[1:]]   # Get positions of highway vehicles
            for highway_position in highway_vehicles_positions:
                    if not collisions and ego_position < highway_position and env.road.vehicles[0].lane_index[2]!=0:
                        successful_merges += 1  # Increment successful merges if the ego vehicle is ahead of at least one highway vehicle
                        done = True  # End the episode if the merge is successful

        # Log episode metrics
        total_rewards.append(episode_reward)  # Add episode reward to the total rewards list
        total_collisions += collisions  # Update total collisions count
        total_steps_to_merge.append(steps_to_merge)  # Add steps to merge for this episode

        if dangerous_driving:
            dangerous_driving_episodes += 1  # Increment count of dangerous driving episodes

        # Calculate the time taken for the episode and add to the list
        episode_time = time.time() - start_time  # Calculate elapsed time
        total_episode_times.append(episode_time)  # Add episode time to the list

    # Final metric calculations
    avg_reward = np.mean(total_rewards)  # Calculate average reward
    avg_steps_to_merge = np.mean(total_steps_to_merge)  # Calculate average steps to merge
    avg_episode_time = np.mean(total_episode_times)  # Calculate average episode time

    # Display results
    print(f"Average Reward: {avg_reward}")  # Print average reward
    print(f"Average Steps to Merge: {avg_steps_to_merge}")  # Print average steps to merge
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")  # Print average episode time
    print(f"Number of Collisions: {total_collisions}")  # Print total collisions
    print(f"Successful Merges: {successful_merges}")  # Print number of successful merges
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")  # Print count of dangerous driving episodes

    return {
        "avg_reward": avg_reward,  # Return average reward
        "avg_steps_to_merge": avg_steps_to_merge,  # Return average steps to merge
        "avg_episode_time": avg_episode_time,  # Return average episode time
        "number_collisions": total_collisions,  # Return total number of collisions
        "successful_merges": successful_merges,  # Return number of successful merges
        "number_dangerous_episodes": dangerous_driving_episodes  # Return number of dangerous driving episodes
    }


In [13]:
# Load the trained model
model = PPO.load("env_ego_entering_brake_close_0/model")  

# Evaluate the model
results = evaluate_agent(model, env_v0, 200) 

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0
Distance to highway vehicle: 44.99999999999997, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 39.99999999999997, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 35.00000000000004, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 30.000000000000114, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 25.00000000

In [14]:
# Load the trained model
model = PPO.load("env_ego_entering_brake_close_1/model")  

# Evaluate the model
results = evaluate_agent(model, env_v1, 200) 

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0
Distance to highway vehicle: 44.99999999999997, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 37.48733529612154, Ego speed: 24.1455588268693, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8793950980763255
crashFalse
overFalse
Distance to highway vehicle: 25.399726929920604, Ego speed: 28.999544883201107, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.9333282764800123
crashFalse
overFalse
Distance to highway vehicle: 10.897420394760445, Ego speed: 29.829033991267547, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.9425448221251949
crashFalse
overFalse

In [15]:
# Load the trained model
model = PPO.load("env_ego_entering_brake_close_2/model")  

# Evaluate the model
results = evaluate_agent(model, env_v2, 200) 

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0
Distance to highway vehicle: 44.99999999999997, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 37.48733529612154, Ego speed: 24.1455588268693, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8793950980763255
crashFalse
overFalse
Distance to highway vehicle: 25.399726929920604, Ego speed: 28.999544883201107, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.9333282764800123
crashFalse
overFalse
Distance to highway vehicle: 10.897420394760445, Ego speed: 29.829033991267547, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.9425448221251949
crashFalse
overFalse

In [16]:
# Load the trained model
model = PPO.load("env_ego_entering_brake_close_3/model")  

# Evaluate the model
results = evaluate_agent(model, env_v3, 200) 

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0
Distance to highway vehicle: 44.99999999999997, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 37.48733529612154, Ego speed: 24.1455588268693, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8793950980763255
crashFalse
overFalse
Distance to highway vehicle: 25.399726929920604, Ego speed: 28.999544883201107, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.9333282764800123
crashFalse
overFalse
Distance to highway vehicle: 10.897420394760445, Ego speed: 29.829033991267547, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.9425448221251949
crashFalse
overFalse

In [17]:
# Load the trained model
model = PPO.load("env_ego_entering_brake_close_4/model")  

# Evaluate the model
results = evaluate_agent(model, env_v4, 200) 

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0
Distance to highway vehicle: 44.99999999999997, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 37.48733529612154, Ego speed: 24.1455588268693, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8793950980763255
crashFalse
overFalse
Distance to highway vehicle: 25.399726929920604, Ego speed: 28.999544883201107, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.9333282764800123
crashFalse
overFalse
Distance to highway vehicle: 10.897420394760445, Ego speed: 29.829033991267547, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.9425448221251949
crashFalse
overFalse

TEXTINHO A ANALISAR OS RESULTADOS

MUDAR PARA SER O VIDEO DO QUE TIVER MELHOR RESULTADO

In [20]:
# Load the trained model
model = PPO.load("env_ego_entering_brake_close_0/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_v0.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_v0.step(action)
    frame = env_v0.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_v0.close()

# Save the frames as a video
video_filename = "brake_and_let_go.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

Posição do veículo ego: [30.  14.5], Velocidade: 20
Posição do veículo da autoestrada: [80.  4.], Velocidade: 15.0
Distance to highway vehicle: 44.99999999999997, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 39.99999999999997, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 35.00000000000004, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 30.000000000000114, Ego speed: 20.0, Highway speed: 15.0, Highway acceleration: 0.0
Braking reward: 0.0, Influence penalty: 0.0, Total reward: 0.8333333333333333
crashFalse
overFalse
Distance to highway vehicle: 25.00000000

In [22]:
# Display the video
video_filename = "brake_and_let_go.mp4"
Video(video_filename, embed=True)