# **Base Model - Merging: the default from highway env**

## **Imports**

In [4]:
import gymnasium as gym
from matplotlib import pyplot as plt
import cv2
import pprint
from IPython.display import Video
import highway_env
from highway_env import utils
from highway_env.envs import MergeEnv
import pandas as pd
import time
import numpy as np
from stable_baselines3 import PPO
import imageio
%matplotlib inline

## **The Environment**

In [21]:
env = gym.make("merge-v0", render_mode='rgb_array')

## **Training**

In [22]:
model = PPO('MlpPolicy', env,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="base_model/")
timesteps = 100
model.learn(total_timesteps=timesteps)
model.save("base_model/model")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to base_model/PPO_2
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue


## **Testing**
RESULTADOS

In [23]:
def evaluate_agent(model, env, num_episodes, speed_threshold_ratio=0.5):
    total_reward = 0
    total_steps_to_merge = 0
    total_episode_time = 0
    total_collisions = 0
    successful_merges = 0
    dangerous_driving_episodes = 0

    for episode in range(num_episodes):
        obs, info = env.reset()  # Get both observation and info from reset
        done = False
        episode_reward = 0
        steps_to_merge = 0
        episode_time = 0
        collision_count = 0
        prev_speed = 0  # For detecting sudden speed changes
        sudden_speed_change = False
        has_crashed = False  # Track if a crash occurs in the episode
        
        while not done:
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info = env.step(action)
            
            episode_reward += reward
            episode_time += 1  # Increment time per step
            
            # Track number of steps to merge and collisions
            steps_to_merge += 1
            if info.get('crashed', False):  # Check if a crash has occurred
                has_crashed = True
                collision_count += 1
            
            # Detect dangerous driving episodes (speed suddenly changes)
            current_speed = info.get('speed', 0)  # Get current speed from info
            if prev_speed != 0 and abs(current_speed - prev_speed) > speed_threshold_ratio * prev_speed:
                sudden_speed_change = True
            prev_speed = current_speed

            if sudden_speed_change:
                dangerous_driving_episodes += 1
                sudden_speed_change = False

            # Check if episode ended (either terminated or truncated)
            done = terminated or truncated

        # Consider the merge successful if the episode ends and there was no crash
        if done and not has_crashed:
            successful_merges += 1

        # After the episode ends, accumulate results
        total_reward += episode_reward
        total_steps_to_merge += steps_to_merge
        total_episode_time += episode_time
        total_collisions += collision_count

    # Calculate averages over all episodes
    avg_reward = total_reward / num_episodes
    avg_steps_to_merge = total_steps_to_merge / num_episodes
    avg_episode_time = total_episode_time / num_episodes
    avg_collisions = total_collisions / num_episodes

    # Print the results
    print(f"Average Reward: {avg_reward}")
    print(f"Average Steps to Merge: {avg_steps_to_merge}")
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")
    print(f"Number of Collisions: {total_collisions}")
    print(f"Successful Merges: {successful_merges}")
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")

In [24]:
# Load the trained model
model = PPO.load("base_model/model")

# Evaluate the model
results = evaluate_agent(model, env, 50)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
over

In [25]:
# Load the trained model
model = PPO.load("base_model/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env.step(action)
    frame = env.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env.close()

# Save the frames as a video
video_filename = "base_model.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashTrue
overFalse
Video saved as base_model.mp4


In [26]:
# Display the video
video_filename = "base_model.mp4"
Video(video_filename, embed=True)