# **Study on the Ideal Behaviour, on the highway, when another vehicle is merging into it - Brake**

##### This study aims to determine the optimal strategy for the ego vehicle when another vehicle is merging into its highway. The only variables under consideration are the distance between the ego vehicle and the merging vehicle and the reward `braking_reward`. The goal is to find the optimal braking distance that encourages the ego vehicle to accelerate, ensuring both safety and traffic efficiency.

### **Imports**

In [1]:
import imageio
import cv2
from IPython.display import Video
import gymnasium as gym
from matplotlib import pyplot as plt
import pprint
from highway_env.envs import MergeEnv
from highway_env import utils
import time
import numpy as np
from stable_baselines3 import PPO
import os
%matplotlib inline

### **Custom environment with only ego vehicle and merging vehicle**

**Fixed Rewards**
- high speed = 1
- lane change = -5
- right lane = 3
- braking = 5

In [2]:
class CustomMergeEnv(MergeEnv):
    def _make_vehicles(self) -> None:
        
        road = self.road
        # Ego vehicle
        ego_vehicle = self.action_type.vehicle_class(
            road, road.network.get_lane(("a", "b", 1)).position(30, 0), speed=30
        )
        road.vehicles.append(ego_vehicle)

        other_vehicles_type = utils.class_from_path(self.config["other_vehicles_type"])

        # Merging vehicle
        merging_v = other_vehicles_type(
            road, road.network.get_lane(("j", "k", 0)).position(110, 0), speed=20
        )
        merging_v.target_speed = 30
        road.vehicles.append(merging_v)
        
        # Set the ego vehicle as the primary vehicle
        self.vehicle = ego_vehicle

    def _reward(self, action: int) -> float:
        """
        Custom reward function combining the original reward with proximity-based
        braking behavior to allow the merging vehicle to merge safely.
        """
        # Get the original reward from the parent class (if it exists)
        reward = super()._reward(action)
        
        ego_vehicle = self.vehicle
        road = self.road

        # Find the merging vehicle (the vehicle in the merging lane)
        merging_vehicle = None
        for vehicle in road.vehicles:
            if vehicle.lane_index == ("j", "k", 0):  # Assuming this is the merging lane
                merging_vehicle = vehicle
                break
        
        # Calculate distance to the merging vehicle
        if not merging_vehicle:
            return reward
        
    
        # Calculate relative positions and velocities
        distance_to_merging_vehicle = abs(merging_vehicle.position[0] - ego_vehicle.position[0])
        near_merge_point = abs(ego_vehicle.position[0] - road.network.get_lane(("b", "c", 0)).position(0, 0)[0]) < 100

        # Estimate highway vehicle's deceleration based on change in speed
        if not hasattr(self, "_previous_merging_speed"):
            self._previous_merging_speed = merging_vehicle.speed  # Initialize previous speed

        merging_acceleration = merging_vehicle.speed - self._previous_merging_speed
        self._previous_merging_speed = merging_vehicle.speed  # Update for the next step

        # Penalize ego vehicle for influencing the highway vehicle's behavior
        influence_penalty = 0.0
        if near_merge_point and distance_to_merging_vehicle < 20:  # Close to the highway vehicle
            if merging_acceleration < -1.0:  # Significant deceleration (tunable threshold)
                print("Highway vehicle influenced: significant deceleration detected")
                influence_penalty = self.config.get("influence_penalty", -5.0)  # Large penalty for interference

       
        d_min, d_max = self.config.get("braking_interval", [0, 10])
        
        if d_min < distance_to_merging_vehicle < d_max :
            
            braking_reward = self.config.get("braking_reward", 5)
           
        else:
            braking_reward = self.config.get("braking_reward", 0)
        
        reward += braking_reward
        reward += influence_penalty

        return reward

In [3]:
# Registering the custom environment
gym.envs.registration.register(
    id='CustomMerge-v0',
    entry_point='__main__:CustomMergeEnv',  
)

### **Braking distance interval [0,10] meters**

In [4]:
env_dis_0_10 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[0,10]})

In [None]:
model = PPO('MlpPolicy', env_dis_0_10,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_0_10_other_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_0_10_other_influence/model")

### **Braking distance interval [10,20] meters**


In [5]:
env_dis_10_20 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[10,20]})

In [None]:
model = PPO('MlpPolicy', env_dis_10_20,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_10_20_other_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_10_20_other_influence/model")

### **Braking distance interval [20,30] meters**

In [6]:
env_dis_20_30 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[20,30]})

In [None]:
model = PPO('MlpPolicy', env_dis_20_30,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_20_30_other_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_20_30_other_influence/model")

### **Braking distance interval [30,40] meters**

In [7]:
env_dis_30_40 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[30,40]})

In [None]:
model = PPO('MlpPolicy', env_dis_30_40,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_30_40_other_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_30_40_other_influence/model")

### **Braking distance interval [40,50] meters**

In [8]:
env_dis_40_50 = CustomMergeEnv(render_mode='rgb_array',config={"high_speed_reward": 1, "lane_change_reward": -5, "right_lane_reward": 3,"braking_interval":[40,50]})

In [None]:
model = PPO('MlpPolicy', env_dis_40_50,
            policy_kwargs=dict(net_arch=[256, 256]),
            learning_rate=5e-4,
            n_steps=2048, 
            batch_size=64, 
            n_epochs=10,  
            gamma=0.8,
            gae_lambda=0.95, 
            clip_range=0.2, 
            verbose=1,
            tensorboard_log="env_40_50_other_influence/")
timesteps = 1000000
model.learn(total_timesteps=timesteps)
model.save("env_40_50_other_influence/model")

### **Evaluate and compare the models**

**For the braking distance  (0,10)**
- Average Reward: 6.8363
- Average Steps to Merge: 7.0
- Average Episode Time: 0.05 seconds
- Number of Collisions: 0
- Successful Merges: 200
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the braking distance  (10,20)**
- Average Reward: 6.7459
- Average Steps to Merge: 7.0
- Average Episode Time: 0.06 seconds
- Number of Collisions: 0
- Successful Merges: 200
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the braking distance  (20,30)**
- Average Reward: 6.7610
- Average Steps to Merge: 7.0 
- Average Episode Time: 0.06 seconds
- Number of Collisions: 0
- Successful Merges: 200
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the braking distance  (30,40)**
- Average Reward: 6.8364
- Average Steps to Merge: 7.0
- Average Episode Time: 0.06 seconds
- Number of Collisions: 0
- Successful Merges: 200
- Number of Dangerous Driving Episodes (sudden speed changes): 0

**For the braking distance  (40,50)**
- Average Reward: 6.8364
- Average Steps to Merge: 7.0
- Average Episode Time: 0.06 seconds
- Number of Collisions: 0
- Successful Merges: 200
- Number of Dangerous Driving Episodes (sudden speed changes): 0

In [85]:
# Function to test the agent and collect metrics
def evaluate_agent(model, env, num_episodes, speed_threshold_ratio=0.5):
    total_rewards = []  # List to store total rewards from each episode
    total_collisions = 0  # Counter for the total number of collisions
    successful_merges = 0  # Counter for successful merges
    dangerous_driving_episodes = 0  # Counter for episodes with sudden speed changes (dangerous driving)
    total_steps_to_merge = []  # List to store the steps taken to merge for each episode
    total_episode_times = []  # List to store the time taken for each episode

    # Calculate the threshold based on the speed range
    reward_speed_range = env.config["reward_speed_range"]  # The speed range from the environment's configuration
    speed_threshold = (reward_speed_range[1] - reward_speed_range[0]) * speed_threshold_ratio  # Speed threshold for detecting dangerous driving
    
    ego_vehicle = env.vehicle  # The ego vehicle in the environment
    road = env.road  # The road in the environment
    
    # Loop through the vehicles on the road to find the merging vehicle
    for vehicle in road.vehicles:
            if vehicle.lane_index == ("j", "k", 0) and vehicle != ego_vehicle:  # Assuming this is the merging lane
                merging_vehicle = vehicle
                break

    # Loop through episodes to evaluate the agent's performance
    for episode in range(num_episodes):
        start_time = time.time()  # Record the start time of the episode
        obs, info = env.reset()  # Reset the environment and get initial observation
        done = False  # Flag to check if the episode is done
        episode_reward = 0  # Variable to track the reward for the current episode
        collisions = 0  # Counter for collisions during the current episode
        dangerous_driving = False  # Flag for dangerous driving
        steps_to_merge = 0  # Counter for the number of steps taken to merge
        last_speed = None  # Initialize the previous speed as None

        # Loop through steps within each episode
        while not done:
            # The agent selects an action
            action, _states = model.predict(obs, deterministic=True)
            # The action is executed in the environment
            obs, reward, terminated, truncated, info = env.step(action)

            episode_reward += reward  # Add the reward from the current step to the total episode reward
            steps_to_merge += 1  # Increment the steps to merge

            # Check the current speed and round it to 2 decimal places
            current_speed = round(info.get('speed', 0), 2)

            # Check if there was a sudden speed change
            if last_speed is not None and abs(current_speed - last_speed) > speed_threshold:
                dangerous_driving = True  # Flag dangerous driving if the speed change exceeds the threshold

            last_speed = current_speed  # Update the previous speed

            # Check for collisions
            if 'crashed' in info and info['crashed']:
                collisions += 1  # Increment the collision counter if a crash occurred

            # Check if the episode is done (either terminated or truncated)
            done = terminated or truncated
            
            # Check if there is information about the merging vehicle
            if merging_vehicle:
                ego_position = env.vehicle.position  # [x, y] position of the ego vehicle
                merging_position = env.road.vehicles[1].position  # [x, y] position of the merging vehicle

                # Condition for success: no collisions and the merging vehicle successfully merged
                # This line checks that the ego vehicle has completed the merge successfully
                # The merging vehicle is assumed to have merged when the ego vehicle's position is ahead of it and the lane index changes
                if not collisions and ego_position[0] < merging_position[0] and env.road.vehicles[1].lane_index[2] != 0:
                    successful_merges += 1  # Increment successful merges counter
                    done = True  # Mark the episode as done
        
        # Record metrics for the episode
        total_rewards.append(episode_reward)  # Add the total reward for the episode
        total_collisions += collisions  # Add the number of collisions for the episode
        total_steps_to_merge.append(steps_to_merge)  # Add the number of steps to merge for the episode

        if dangerous_driving:
            dangerous_driving_episodes += 1  # Increment the dangerous driving episodes counter

        # Calculate the episode's duration and add it to the list
        episode_time = time.time() - start_time
        total_episode_times.append(episode_time)
    
    # Calculate the final metrics across all episodes
    avg_reward = np.mean(total_rewards)  # Average reward across all episodes
    avg_steps_to_merge = np.mean(total_steps_to_merge)  # Average steps to merge across all episodes
    avg_episode_time = np.mean(total_episode_times)  # Average episode time across all episodes

    # Display the results
    print(f"Average Reward: {avg_reward}")
    print(f"Average Steps to Merge: {avg_steps_to_merge}")
    print(f"Average Episode Time: {avg_episode_time:.2f} seconds")  # Display the average time per episode
    print(f"Number of Collisions: {total_collisions}")
    print(f"Successful Merges: {successful_merges}")
    print(f"Number of Dangerous Driving Episodes (sudden speed changes): {dangerous_driving_episodes}")

    # Return the metrics for further analysis or logging
    return {
        "avg_reward": avg_reward,
        "avg_steps_to_merge": avg_steps_to_merge,
        "avg_episode_time": avg_episode_time,  
        "number_collisions": total_collisions,
        "successful_merges": successful_merges,
        "number_dangerous_episodes": dangerous_driving_episodes
    }


In [87]:
# Load the trained model
model = PPO.load("env_0_10_other_influence/model")

# Evaluate the agent 
results = evaluate_agent(model, env_dis_0_10, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
ov

In [89]:
# Load the trained model
model = PPO.load("env_10_20_other_influence/model")

# Evaluate the agent
results = evaluate_agent(model, env_dis_10_20, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
ov

In [91]:
# Load the trained model
model = PPO.load("env_20_30_other_influence/model")

# Evaluate the agent
results = evaluate_agent(model, env_dis_20_30, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
ov

In [93]:
# Load the trained model
model = PPO.load("env_30_40_other_influence/model")

# Evaluate the agent
results = evaluate_agent(model, env_dis_30_40, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
ov

In [95]:
# Load the trained model
model = PPO.load("env_40_50_other_influence/model")

# Evaluate the agent
results = evaluate_agent(model, env_dis_40_50, 200)

crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
ov

The braking distance scenarios were tested across a range of values, and the results show consistent performance from the model in all evaluated ranges. In the **(0, 10)** braking distance range, the agent achieved an **average reward** of **6.8363**, indicating a strong performance with no collisions and **200 successful merges**. The **average steps to merge** was **7.0**, with an **average episode time** of **0.05 seconds**, which suggests quick decision-making and efficient handling of the merging task. Notably, there were **zero dangerous driving episodes**, highlighting the model's stability during operation.

Similarly, in the **(10, 20)** range, the **average reward** slightly decreased to **6.7459**, yet the model continued to perform effectively, achieving **200 successful merges** and maintaining a **0** collision count. The **average steps to merge** and **episode time** remained consistent at **7.0** and **0.06 seconds**, respectively, indicating stable performance across varying braking distances. As with the previous range, no dangerous driving episodes were recorded.

The performance continued to hold steady in the **(20, 30)** range, where the model recorded an **average reward** of **6.7610**. The agent still achieved **200 successful merges**, took an average of **7 steps to merge**, and completed episodes in **0.06 seconds** on average. The lack of collisions and dangerous driving episodes further reinforced the model's reliability across this range.

In the **(30, 40)** and **(40, 50)** ranges, the results remained virtually identical to those observed in the previous ranges. In both ranges, the **average reward** was **6.8364**, with **200 successful merges**, no collisions, and no dangerous driving episodes. The agent maintained consistent decision-making times and efficiently handled the merging process in **7 steps** per merge with an **average episode time** of **0.06 seconds**.

Overall, the model exhibited **consistent and reliable performance** across all braking distance ranges, demonstrating its ability to handle merging tasks efficiently, regardless of the braking distance. The lack of collisions and dangerous driving episodes throughout the tests suggests that the model maintains stability and safety across a variety of conditions. The **slight variations in reward** across the different braking distances are negligible, indicating that the agent performs well within the specified parameters.

In [9]:
# Load the trained model
model = PPO.load("env_0_10_other_influence/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_dis_0_10.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_dis_0_10.step(action)
    frame = env_dis_0_10.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_dis_0_10.close()

# Save the frames as a video
video_filename = "0_10_distance.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

Exception: code() argument 13 must be str, not int
Exception: code() argument 13 must be str, not int


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
Video saved as 0_10_distance.mp4


In [10]:
# Display the video
video_filename = "0_10_distance.mp4"
Video(video_filename, embed=True)

In [11]:
# Load the trained model
model = PPO.load("env_10_20_other_influence/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_dis_10_20.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_dis_10_20.step(action)
    frame = env_dis_10_20.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_dis_10_20.close()

# Save the frames as a video
video_filename = "10_20_distance.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

Exception: code() argument 13 must be str, not int
Exception: code() argument 13 must be str, not int


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
Video saved as 10_20_distance.mp4


In [12]:
# Display the video
video_filename = "10_20_distance.mp4"
Video(video_filename, embed=True)

In [13]:
# Load the trained model
model = PPO.load("env_20_30_other_influence/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_dis_20_30.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_dis_20_30.step(action)
    frame = env_dis_20_30.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_dis_20_30.close()

# Save the frames as a video
video_filename = "20_30_distance.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

crashFalse

Exception: code() argument 13 must be str, not int
Exception: code() argument 13 must be str, not int



overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
Video saved as 20_30_distance.mp4


In [14]:
# Display the video
video_filename = "20_30_distance.mp4"
Video(video_filename, embed=True)

In [15]:
# Load the trained model
model = PPO.load("env_30_40_other_influence/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_dis_30_40.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_dis_30_40.step(action)
    frame = env_dis_30_40.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_dis_30_40.close()

# Save the frames as a video
video_filename = "30_40_distance.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

crashFalse
overFalse


Exception: code() argument 13 must be str, not int
Exception: code() argument 13 must be str, not int


crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
Video saved as 30_40_distance.mp4


In [16]:
# Display the video
video_filename = "30_40_distance.mp4"
Video(video_filename, embed=True)

In [17]:
# Load the trained model
model = PPO.load("env_40_50_other_influence/model")

# Initialize the environment and variables for recording
frames = []
obs, info = env_dis_40_50.reset()
done = False
step_count = 0
max_steps = 1000

# Resize frame to be divisible by 16 (macro block size for video codecs)
def resize_frame_to_macro_block_size(frame, block_size=16):
    h, w, _ = frame.shape
    new_w = (w // block_size) * block_size
    new_h = (h // block_size) * block_size
    return cv2.resize(frame, (new_w, new_h))

# Run the agent in the environment
while step_count < max_steps and not done:
    action, _ = model.predict(obs)
    obs, reward, done, truncated, info = env_dis_40_50.step(action)
    frame = env_dis_40_50.render()

    # Resize the frame to avoid the macro_block_size warning
    resized_frame = resize_frame_to_macro_block_size(frame)
    frames.append(resized_frame)
    
    step_count += 1

# Close the environment
env_dis_40_50.close()

# Save the frames as a video
video_filename = "40_50_distance.mp4"
imageio.mimsave(video_filename, frames, fps=30)
print(f"Video saved as {video_filename}")

crashFalse

Exception: code() argument 13 must be str, not int
Exception: code() argument 13 must be str, not int



overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overFalse
crashFalse
overTrue
Video saved as 40_50_distance.mp4


In [18]:
# Display the video
video_filename = "40_50_distance.mp4"
Video(video_filename, embed=True)