In [1]:
# Install the required libraries in your Google Colab environment
!pip install gymnasium stable-baselines3 highway-env -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/184.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m184.3/184.5 kB[0m [31m9.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.5/184.5 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m68.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m50.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m39.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Import the necessary libraries
import gymnasium as gym
import highway_env
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt

In [3]:
# THE ENVIRONMENT
# Step 1.1: Choose the Environment
# Initialize the environment.
env = gym.make('highway-v0',config={"vehicles_count":50})

In [4]:
from stable_baselines3 import PPO
from google.colab import drive
from google.colab import data_table

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Step A.6.2: Load the all the saved PPO model
model = PPO.load('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/01_trained_models/3_ppo_highway_biased_hf_direct_aggressive')                     # Update directory location 1

Trajectory Collection with Additional information (Collision Flag and Lane Index)

In [6]:
# TRAJECTORY COLLECTION WITH ADDTIONNAL INFORMATION
# Initialize a list to store trajectory data
trajectories = []

# FUNCTION TO COLLECT TRAJECTORY DATA (state-action-reward transitions).

def collect_trajectory_data(env, model, num_episodes,seed):
    """
    Collect trajectory data for a number of episodes.
    Each trajectory contains state-action-reward sequences.
    """
    trajectory_data = []

    for episode in range(num_episodes):
        state, _ = env.reset(seed=seed)  # Reset the environment at the start of each episode              #  change environment name here
        done = False
        episode_data = []

        while not done:
            # Get action from the trained PPO model
            action, _states = model.predict(state, deterministic = True)                                       # change model name here

            # Take the action and get next state and reward
            next_state, reward, terminated, truncated, info = env.step(action)                    #  change environment name here
            done = terminated or truncated
            # Extract lane index and collision flag
            lane_index = int(env.unwrapped.vehicle.lane_index[2])
            collision_flag = int(info.get('crashed', 0))

            # Store the trajectory: (state, action, reward, next_state)
            episode_data.append({
                "state": state,
                "action": action,
                "reward": reward,
                "next_state": next_state,
                "lane_indices": lane_index,
                "collision_flags": collision_flag
            })

            # Update the state for the next iteration
            state = next_state

        # Add the episode data to the overall trajectory list
        trajectory_data.append(episode_data)

    return trajectory_data

In [7]:
# FUNCTION TO PREPROCESS TRAJECTORY DATA
def preprocess_trajectory_data(trajectory_data):
    """
    Preprocesses the trajectory data into a structured format for further analysis.
    Returns a DataFrame with columns: episode, time_step, state, action, reward, next_state, speed, and reward_details.
    """
    processed_data = []

    for episode_num, episode_data in enumerate(trajectory_data):
        for time_step, step in enumerate(episode_data):
            # Flatten the state and next_state for easy interpretation (if they are multi-dimensional)
            state = np.array(step['state']).flatten()  # Flatten the state vector (if multi-dimensional)
            next_state = np.array(step['next_state']).flatten()  # Flatten the next_state vector

            collision_flag = step['collision_flags']
            lane_index = step['lane_indices']

            # Append the processed data for this step
            processed_data.append({
                "episode": episode_num,
                "time_step": time_step,
                "state": state,
                "action": step['action'],
                "reward": step['reward'],
                "next_state": next_state,
                "collision_flag": collision_flag,
                "lane_index": lane_index
            })

    # Convert the list of processed data into a DataFrame
    df = pd.DataFrame(processed_data)
    return df

0. GENERATING TRAJECTORIES FOR TESTING (BIASED HUMAN FEEDBACK DIRECT AGGRESSIVE)

First Biased Human feedback direct Aggressive data frame

In [8]:
# Collect data for 100 episodes
trajectory_data_1 = collect_trajectory_data(env, model, num_episodes=100,seed=2)

In [9]:
# Preprocess the trajectory data
trajectory_df_seed_1 = preprocess_trajectory_data(trajectory_data_1)

In [10]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_1)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.888621, 0.75, 0.3125, 0.0, 1.0, 0.0987...",3,0.977215,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.07669...",0,3
1,0,1,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.07669...",3,0.996106,"[1.0, 1.0, 0.75, 0.37317482, 0.0, 1.0, 0.04164...",0,3
2,0,2,"[1.0, 1.0, 0.75, 0.37317482, 0.0, 1.0, 0.04164...",3,0.999335,"[1.0, 1.0, 0.75, 0.3746881, 0.0, 1.0, 0.003051...",0,3
3,0,3,"[1.0, 1.0, 0.75, 0.3746881, 0.0, 1.0, 0.003051...",3,0.999886,"[1.0, 1.0, 0.75, 0.3749467, 0.0, 1.0, 0.034169...",0,3
4,0,4,"[1.0, 1.0, 0.75, 0.3749467, 0.0, 1.0, 0.034169...",3,0.066667,"[1.0, 1.0, 0.75, 0.17554855, 0.0, 1.0, 0.025, ...",1,3
...,...,...,...,...,...,...,...,...
495,99,0,"[1.0, 0.888621, 0.75, 0.3125, 0.0, 1.0, 0.0987...",3,0.977215,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.07669...",0,3
496,99,1,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.07669...",3,0.996106,"[1.0, 1.0, 0.75, 0.37317482, 0.0, 1.0, 0.04164...",0,3
497,99,2,"[1.0, 1.0, 0.75, 0.37317482, 0.0, 1.0, 0.04164...",3,0.999335,"[1.0, 1.0, 0.75, 0.3746881, 0.0, 1.0, 0.003051...",0,3
498,99,3,"[1.0, 1.0, 0.75, 0.3746881, 0.0, 1.0, 0.003051...",3,0.999886,"[1.0, 1.0, 0.75, 0.3749467, 0.0, 1.0, 0.034169...",0,3


In [11]:
# Save the dataframe as a pickle file
trajectory_df_seed_1.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/3_biased_hf_d_aggressive/1_biased_hf_d_aggressive_df.pkl')         # Update directory location 2


Second Biased Human feedback direct Aggressive data frame


In [12]:
# Collect data for 100 episodes
trajectory_data_2 = collect_trajectory_data(env, model, num_episodes=100,seed=10)

In [13]:
# Preprocess the trajectory data
trajectory_df_seed_2 = preprocess_trajectory_data(trajectory_data_2)

In [14]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_2)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.8845452, 0.75, 0.3125, 0.0, 1.0, 0.100...",3,0.977215,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.07899...",0,3
1,0,1,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.07899...",3,0.996106,"[1.0, 1.0, 0.75, 0.37317482, 0.0, 1.0, 0.04019...",0,3
2,0,2,"[1.0, 1.0, 0.75, 0.37317482, 0.0, 1.0, 0.04019...",3,0.999335,"[1.0, 1.0, 0.75, 0.3746881, 0.0, 1.0, -0.00426...",0,3
3,0,3,"[1.0, 1.0, 0.75, 0.3746881, 0.0, 1.0, -0.00426...",3,0.999886,"[1.0, 1.0, 0.75, 0.3749467, 0.0, 1.0, 0.021620...",0,3
4,0,4,"[1.0, 1.0, 0.75, 0.3749467, 0.0, 1.0, 0.021620...",3,0.999981,"[1.0, 1.0, 0.75, 0.37499088, 0.0, 1.0, -0.0256...",0,3
...,...,...,...,...,...,...,...,...
1095,99,6,"[1.0, 1.0, 0.53578675, 0.37441987, -0.02082289...",3,0.977771,"[1.0, 1.0, 0.50209767, 0.37499678, -0.00148757...",0,2
1096,99,7,"[1.0, 1.0, 0.50209767, 0.37499678, -0.00148757...",3,0.977778,"[1.0, 1.0, 0.500084, 0.37499994, -6.799132e-05...",0,2
1097,99,8,"[1.0, 1.0, 0.500084, 0.37499994, -6.799132e-05...",3,0.977778,"[1.0, 1.0, 0.50000215, 0.375, -2.1164294e-06, ...",0,2
1098,99,9,"[1.0, 1.0, 0.50000215, 0.375, -2.1164294e-06, ...",3,0.977778,"[1.0, 1.0, 0.5, 0.375, -2.5729069e-08, 1.0, -0...",0,2


In [15]:
# Save the dataframe as a pickle file
trajectory_df_seed_2.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/3_biased_hf_d_aggressive/2_biased_hf_d_aggressive_df.pkl')          # Update directory location 3

Third Biased Human feedback direct Aggressive data frame

In [None]:
# Collect data for 100 episodes
trajectory_data_3 = collect_trajectory_data(env, model, num_episodes=100,seed=6)

In [None]:
# Preprocess the trajectory data
trajectory_df_seed_3 = preprocess_trajectory_data(trajectory_data_3)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_3)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.89063084, 0.25, 0.3125, 0.0, 1.0, 0.10...",3,0.932770,"[1.0, 1.0, 0.25, 0.36431947, 0.0, 1.0, 0.06666...",0,1
1,0,1,"[1.0, 1.0, 0.25, 0.36431947, 0.0, 1.0, 0.06666...",3,0.951662,"[1.0, 1.0, 0.25, 0.37317482, 0.0, 1.0, 0.01380...",0,1
2,0,2,"[1.0, 1.0, 0.25, 0.37317482, 0.0, 1.0, 0.01380...",3,0.954890,"[1.0, 1.0, 0.25, 0.3746881, 0.0, 1.0, -0.03676...",0,1
3,0,3,"[1.0, 1.0, 0.25, 0.3746881, 0.0, 1.0, -0.03676...",3,0.955442,"[1.0, 1.0, 0.25, 0.3749467, 0.0, 1.0, 0.035849...",0,1
4,0,4,"[1.0, 1.0, 0.25, 0.3749467, 0.0, 1.0, 0.035849...",3,0.955536,"[1.0, 1.0, 0.25, 0.37499088, 0.0, 1.0, -0.0166...",0,1
...,...,...,...,...,...,...,...,...
1395,99,9,"[1.0, 1.0, 0.25, 0.375, 0.0, 1.0, 0.021099757,...",3,0.955556,"[1.0, 1.0, 0.25, 0.375, 0.0, 1.0, -0.03581893,...",0,1
1396,99,10,"[1.0, 1.0, 0.25, 0.375, 0.0, 1.0, -0.03581893,...",3,0.955556,"[1.0, 1.0, 0.25, 0.375, 0.0, 1.0, 0.030074343,...",0,1
1397,99,11,"[1.0, 1.0, 0.25, 0.375, 0.0, 1.0, 0.030074343,...",2,0.976543,"[1.0, 1.0, 0.46421292, 0.37442142, 0.020822959...",0,2
1398,99,12,"[1.0, 1.0, 0.46421292, 0.37442142, 0.020822959...",3,0.977771,"[1.0, 1.0, 0.4979023, 0.37499705, 0.0014875978...",0,2


In [None]:
# Save the dataframe as a pickle file
trajectory_df_seed_3.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/3_biased_hf_d_aggressive/3_biased_hf_d_aggressive_df.pkl')        # Update directory location 4

Fourth Biased Human feedback direct Aggressive data frame

In [None]:
# Collect data for 100 episodes
trajectory_data_4 = collect_trajectory_data(env, model, num_episodes=100,seed=20)

In [None]:
# Preprocess the trajectory data
trajectory_df_seed_4 = preprocess_trajectory_data(trajectory_data_4)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_4)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.8959215, 0.75, 0.3125, 0.0, 1.0, 0.101...",3,0.977215,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,3
1,0,1,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,0.069317,"[1.0, 1.0, 0.5826099, 0.26165888, -0.029014844...",1,2
2,1,0,"[1.0, 0.8959215, 0.75, 0.3125, 0.0, 1.0, 0.101...",3,0.977215,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,3
3,1,1,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,0.069317,"[1.0, 1.0, 0.5826099, 0.26165888, -0.029014844...",1,2
4,2,0,"[1.0, 0.8959215, 0.75, 0.3125, 0.0, 1.0, 0.101...",3,0.977215,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,3
...,...,...,...,...,...,...,...,...
195,97,1,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,0.069317,"[1.0, 1.0, 0.5826099, 0.26165888, -0.029014844...",1,2
196,98,0,"[1.0, 0.8959215, 0.75, 0.3125, 0.0, 1.0, 0.101...",3,0.977215,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,3
197,98,1,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,0.069317,"[1.0, 1.0, 0.5826099, 0.26165888, -0.029014844...",1,2
198,99,0,"[1.0, 0.8959215, 0.75, 0.3125, 0.0, 1.0, 0.101...",3,0.977215,"[1.0, 1.0, 0.75, 0.36431947, 0.0, 1.0, 0.05706...",0,3


In [None]:
# Save the dataframe as a pickle file
trajectory_df_seed_4.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/3_biased_hf_d_aggressive/4_biased_hf_d_aggressive_df.pkl')          # Update directory location 5

Fifth Biased Human feedback direct Aggressive data frame

In [None]:
# Collect data for 100 episodes
trajectory_data_5 = collect_trajectory_data(env, model, num_episodes=100,seed=34)

In [None]:
# Preprocess the trajectory data
trajectory_df_seed_5 = preprocess_trajectory_data(trajectory_data_5)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_5)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.9143699, 0.0, 0.3125, 0.0, 1.0, 0.1053...",3,0.910548,"[1.0, 1.0, 0.0, 0.36431947, 0.0, 1.0, 0.076132...",0,0
1,0,1,"[1.0, 1.0, 0.0, 0.36431947, 0.0, 1.0, 0.076132...",3,0.929440,"[1.0, 1.0, 0.0, 0.37317482, 0.0, 1.0, 0.036051...",0,0
2,0,2,"[1.0, 1.0, 0.0, 0.37317482, 0.0, 1.0, 0.036051...",3,0.000000,"[1.0, 1.0, 0.0, 0.17503068, 0.0, 1.0, 0.025, 0...",1,0
3,1,0,"[1.0, 0.9143699, 0.0, 0.3125, 0.0, 1.0, 0.1053...",3,0.910548,"[1.0, 1.0, 0.0, 0.36431947, 0.0, 1.0, 0.076132...",0,0
4,1,1,"[1.0, 1.0, 0.0, 0.36431947, 0.0, 1.0, 0.076132...",3,0.929440,"[1.0, 1.0, 0.0, 0.37317482, 0.0, 1.0, 0.036051...",0,0
...,...,...,...,...,...,...,...,...
295,98,1,"[1.0, 1.0, 0.0, 0.36431947, 0.0, 1.0, 0.076132...",3,0.929440,"[1.0, 1.0, 0.0, 0.37317482, 0.0, 1.0, 0.036051...",0,0
296,98,2,"[1.0, 1.0, 0.0, 0.37317482, 0.0, 1.0, 0.036051...",3,0.000000,"[1.0, 1.0, 0.0, 0.17503068, 0.0, 1.0, 0.025, 0...",1,0
297,99,0,"[1.0, 0.9143699, 0.0, 0.3125, 0.0, 1.0, 0.1053...",3,0.910548,"[1.0, 1.0, 0.0, 0.36431947, 0.0, 1.0, 0.076132...",0,0
298,99,1,"[1.0, 1.0, 0.0, 0.36431947, 0.0, 1.0, 0.076132...",3,0.929440,"[1.0, 1.0, 0.0, 0.37317482, 0.0, 1.0, 0.036051...",0,0


In [None]:
# Save the dataframe as a pickle file
trajectory_df_seed_5.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/3_biased_hf_d_aggressive/5_biased_hf_d_aggressive_df.pkl')              # Update directory location 6