[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RizanSM/zero_shot_llms_in_HIL_RL/blob/main/01_Code/01_Highway_Env/02_Default_Environment/07_LLM_DIRECT/02_Generate_trajectories_LLM_D_default_highway_env.ipynb)

In [None]:
# Install the required libraries in your Google Colab environment
!pip install gymnasium stable-baselines3 highway-env -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/184.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.5/184.5 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m84.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m69.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Import the necessary libraries
import gymnasium as gym
import highway_env
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt

In [None]:
# THE ENVIRONMENT
# Step 1.1: Choose the Environment
# Initialize the environment.
env = gym.make('highway-v0',config={"vehicles_count":50})

In [None]:
from stable_baselines3 import PPO
from google.colab import drive
from google.colab import data_table

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Step A.6.2: Load the all the saved PPO model
model = PPO.load('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/01_trained_models/7_ppo_highway_llmf_direct_ideal_1')          # Update directory location 1

Trajectory Collection with Additional information (Collision Flag and Lane Index)

In [None]:
# TRAJECTORY COLLECTION WITH ADDTIONNAL INFORMATION
# Initialize a list to store trajectory data
trajectories = []

# FUNCTION TO COLLECT TRAJECTORY DATA (state-action-reward transitions).

def collect_trajectory_data(env, model, num_episodes,seed):
    """
    Collect trajectory data for a number of episodes.
    Each trajectory contains state-action-reward sequences.
    """
    trajectory_data = []

    for episode in range(num_episodes):
        state, _ = env.reset(seed=seed)  # Reset the environment at the start of each episode              #  change environment name here
        done = False
        episode_data = []

        while not done:
            # Get action from the trained PPO model
            action, _states = model.predict(state, deterministic = True)                                       # change model name here

            # Take the action and get next state and reward
            next_state, reward, terminated, truncated, info = env.step(action)                    #  change environment name here
            done = terminated or truncated
            # Extract lane index and collision flag
            lane_index = int(env.unwrapped.vehicle.lane_index[2])
            collision_flag = int(info.get('crashed', 0))

            # Store the trajectory: (state, action, reward, next_state)
            episode_data.append({
                "state": state,
                "action": action,
                "reward": reward,
                "next_state": next_state,
                "lane_indices": lane_index,
                "collision_flags": collision_flag
            })

            # Update the state for the next iteration
            state = next_state

        # Add the episode data to the overall trajectory list
        trajectory_data.append(episode_data)

    return trajectory_data

In [None]:
# FUNCTION TO PREPROCESS TRAJECTORY DATA
def preprocess_trajectory_data(trajectory_data):
    """
    Preprocesses the trajectory data into a structured format for further analysis.
    Returns a DataFrame with columns: episode, time_step, state, action, reward, next_state, speed, and reward_details.
    """
    processed_data = []

    for episode_num, episode_data in enumerate(trajectory_data):
        for time_step, step in enumerate(episode_data):
            # Flatten the state and next_state for easy interpretation (if they are multi-dimensional)
            state = np.array(step['state']).flatten()  # Flatten the state vector (if multi-dimensional)
            next_state = np.array(step['next_state']).flatten()  # Flatten the next_state vector

            collision_flag = step['collision_flags']
            lane_index = step['lane_indices']

            # Append the processed data for this step
            processed_data.append({
                "episode": episode_num,
                "time_step": time_step,
                "state": state,
                "action": step['action'],
                "reward": step['reward'],
                "next_state": next_state,
                "collision_flag": collision_flag,
                "lane_index": lane_index
            })

    # Convert the list of processed data into a DataFrame
    df = pd.DataFrame(processed_data)
    return df

0. GENERATING TRAJECTORIES FOR TESTING (LLM DIRECT)

First LLM feedback direct data frame

In [None]:
# Collect data for 100 episodes
trajectory_data_1 = collect_trajectory_data(env, model, num_episodes=100,seed=2)

In [None]:
# Preprocess the trajectory data
trajectory_df_seed_1 = preprocess_trajectory_data(trajectory_data_1)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_1)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Save the dataframe as a pickle file
trajectory_df_seed_1.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/7_llm_d_ideal/1_llmf_d_ideal_df_1.pkl')         # Update directory location 2


Second LLM feedback direct  data frame


In [None]:
# Collect data for 100 episodes
trajectory_data_2 = collect_trajectory_data(env, model, num_episodes=100,seed=10)

In [None]:
# Preprocess the trajectory data
trajectory_df_seed_2 = preprocess_trajectory_data(trajectory_data_2)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_2)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Save the dataframe as a pickle file
trajectory_df_seed_2.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/7_llm_d_ideal/2_llmf_d_ideal_df_1.pkl')         # Update directory location 3

Third LLM feedback direct  data frame

In [None]:
# Collect data for 100 episodes
trajectory_data_3 = collect_trajectory_data(env, model, num_episodes=100,seed=6)

In [None]:
# Preprocess the trajectory data
trajectory_df_seed_3 = preprocess_trajectory_data(trajectory_data_3)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_3)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Save the dataframe as a pickle file
trajectory_df_seed_3.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/7_llm_d_ideal/3_llmf_d_ideal_df_1.pkl')        # Update directory location 4

Fourth LLM feedback direct data frame

In [None]:
# Collect data for 100 episodes
trajectory_data_4 = collect_trajectory_data(env, model, num_episodes=100,seed=20)

In [None]:
# Preprocess the trajectory data
trajectory_df_seed_4 = preprocess_trajectory_data(trajectory_data_4)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_4)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Save the dataframe as a pickle file
trajectory_df_seed_4.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/7_llm_d_ideal/4_llmf_d_ideal_df_1.pkl')         # Update directory location 5

Fifth LLM feedback direct  data frame

In [None]:
# Collect data for 100 episodes
trajectory_data_5 = collect_trajectory_data(env, model, num_episodes=100,seed=34)

In [None]:
# Preprocess the trajectory data
trajectory_df_seed_5 = preprocess_trajectory_data(trajectory_data_5)

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_seed_5)

Output hidden; open in https://colab.research.google.com to view.

In [None]:
# Save the dataframe as a pickle file
trajectory_df_seed_5.to_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/03_test_trajectories/7_llm_d_ideal/5_llmf_d_ideal_df_1.pkl')         # Update directory location 6