[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RizanSM/zero_shot_llms_in_HIL_RL/blob/main/01_Code/01_Highway_Env/03_Edge_Case_Scenario_1/04_LLM_DIRECT/02_Model_testing_LLM_D_Edge_Case_1.ipynb)

In [None]:
# Install the required libraries in your Google Colab environment
!pip install gymnasium stable-baselines3 highway-env -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/184.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.5/184.5 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m69.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m56.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Import the necessary libraries
import gymnasium as gym
import highway_env
import numpy as np
import pandas as pd
import pickle
import os
import matplotlib.pyplot as plt

In [None]:
from stable_baselines3 import PPO
from google.colab import drive
from google.colab import data_table

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


CALCULATE MISALIGNMENT FUNCTION

In [None]:
def calculate_misalignment(df, reward_threshold=0, lane_change_penalty=1, collision_penalty=5, reward_penalty=2):
    """
    Calculate misalignment score for each episode in the dataset.

    Parameters:
        df (pd.DataFrame): The data frame containing highway environment trajectories.
        reward_threshold (float): Threshold below which reward is considered suboptimal.
        lane_change_penalty (float): Penalty factor for unnecessary lane changes.
        collision_penalty (float): Penalty factor for collisions.
        reward_penalty (float): Penalty factor for suboptimal rewards.

    Returns:
        pd.DataFrame: Data frame with misalignment scores per episode.
    """
    misalignment_scores = []  # Changed to 'misalignment_scores' to store all episode scores

    for episode in df['episode'].unique():
        episode_df = df[df['episode'] == episode].sort_values(by='time_step')

        # Calculate lane changes
        lane_changes = episode_df['lane_index'].diff().abs().sum()

        # Count collisions
        collisions = episode_df['collision_flag'].sum()

        # Count suboptimal rewards
        suboptimal_rewards = (episode_df['reward'] < reward_threshold).sum()

        # Compute misalignment score
        misalignment_score = (
            lane_change_penalty * lane_changes +
            collision_penalty * collisions +
            reward_penalty * suboptimal_rewards
        )

        misalignment_scores.append({'episode': episode, 'Misalignment Score': misalignment_score})

    df = pd.DataFrame(misalignment_scores) # Create the DataFrame outside the loop

    # Calculate the overall misalignment as the mean of per-episode misalignment
    Average_misalignment = df['Misalignment Score'].mean() # Calculate mean of 'Misalignment Score' column

    return df, Average_misalignment

AVERAGE TERMINATE TIME (ATT) FUNCTION

In [None]:
def calculate_collision_rate(test_trajectory_df):
    # Initialize an empty list to store the results
    results = []

    # Group by 'episode' to process each episode separately
    episodes = test_trajectory_df.groupby('episode')

    # Iterate through each episode
    for episode, episode_data in episodes:
        # Count the number of collisions in the current episode
        num_collisions = episode_data['collision_flag'].sum()

        # Get the total number of timesteps in the current episode
        total_steps = len(episode_data)

        # Append the results to the list
        results.append({
            'Episode': episode,
            'Total Steps': total_steps
        })

    # Convert the list of results to a DataFrame
    results_df = pd.DataFrame(results)

    # Calculate the overall Collision rate as the mean of per-episode Collision rate
    Average_terminate_time = results_df['Total Steps'].mean()

    # Return the resulting DataFrame
    return results_df, Average_terminate_time

STABILITY OF THE LEARNING CURVES OF THE MODEL FUNCTION

In [None]:
def analyze_learning_curve(test_trajectory_df):
    # Step 1: Calculate Cumulative Reward Per Episode
    cumulative_rewards = test_trajectory_df.groupby('episode')['reward'].sum().reset_index()
    cumulative_rewards.columns = ['episode', 'cumulative_reward']

    # Step 2: Calculate Smoothed Rewards using Moving Average (window size of 10 episodes)
    window_size = 10
    cumulative_rewards['smoothed_reward'] = cumulative_rewards['cumulative_reward'].rolling(window=window_size, min_periods=1).mean()

    # Step 3: Calculate Variance and Standard Deviation of Cumulative Rewards
    variance = cumulative_rewards['cumulative_reward'].var()
    std_dev = cumulative_rewards['cumulative_reward'].std()
    final_reward = cumulative_rewards['cumulative_reward'].mean()

    # Step 4: Return the dataframe containing all results
    return cumulative_rewards, variance, std_dev, final_reward

COMBINING ALL THE DATA FRAMES

In [None]:
def create_cummulative_reward(df1, df2, df3, df4, df5):
    """
    Create a data frame 'ideal_cummulative_reward' that contains:
      - 'episode' column (common across all data frames)
      - 'cumulative_reward_1' to 'cumulative_reward_5' columns from each data frame respectively,
        where each input data frame has columns 'episode' and 'cummulative_reward'.
      - 'mean_cumulative_reward' column containing the row-wise mean of the 5 cumulative rewards.

    Parameters:
      df1, df2, df3, df4, df5 (pd.DataFrame): Data frames with columns 'episode' and 'cummulative_reward'.

    Returns:
      pd.DataFrame: The merged and aggregated data frame.
    """
    # Rename the 'cummulative_reward' column in each data frame to a unique name.
    df1_renamed = df1.rename(columns={'cumulative_reward': 'cumulative_reward_1'})
    df2_renamed = df2.rename(columns={'cumulative_reward': 'cumulative_reward_2'})
    df3_renamed = df3.rename(columns={'cumulative_reward': 'cumulative_reward_3'})
    df4_renamed = df4.rename(columns={'cumulative_reward': 'cumulative_reward_4'})
    df5_renamed = df5.rename(columns={'cumulative_reward': 'cumulative_reward_5'})

    # Merge the data frames on the 'episode' column.
    merged_df = df1_renamed[['episode', 'cumulative_reward_1']].copy()
    merged_df = merged_df.merge(df2_renamed[['episode', 'cumulative_reward_2']], on='episode')
    merged_df = merged_df.merge(df3_renamed[['episode', 'cumulative_reward_3']], on='episode')
    merged_df = merged_df.merge(df4_renamed[['episode', 'cumulative_reward_4']], on='episode')
    merged_df = merged_df.merge(df5_renamed[['episode', 'cumulative_reward_5']], on='episode')

    # Compute the episode-wise mean of the cumulative rewards.
    reward_columns = [
        'cumulative_reward_1',
        'cumulative_reward_2',
        'cumulative_reward_3',
        'cumulative_reward_4',
        'cumulative_reward_5'
    ]
    merged_df['mean_cumulative_reward'] = merged_df[reward_columns].mean(axis=1)

    return merged_df

CALCULATE FINAL MISALIGNMENT, AVERAGE TERMINATE TIME & AVERAGE EPISODIC REWARD

In [None]:
def calculate_performance(value_1, value_2, value_3):
    mean_1 = sum(value_1) / len(value_1)
    mean_2 = sum(value_2) / len(value_2)
    mean_3 = sum(value_3) / len(value_3)

    return mean_1, mean_2, mean_3


0. MODEL TESTING (LLM_D_EDGE_CASE_1)

In [None]:
trajectory_df_1 = pd.read_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/04_test_trajectories_edge_1/4_llm_d_edge_1/1_trajectory_llm_d_ideal_edge_1_df.pkl')     # Update directory location 1
trajectory_df_2 = pd.read_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/04_test_trajectories_edge_1/4_llm_d_edge_1/2_trajectory_llm_d_ideal_edge_1_df.pkl')     # Update directory location 2
trajectory_df_3 = pd.read_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/04_test_trajectories_edge_1/4_llm_d_edge_1/3_trajectory_llm_d_ideal_edge_1_df.pkl')     # Update directory location 3
trajectory_df_4 = pd.read_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/04_test_trajectories_edge_1/4_llm_d_edge_1/4_trajectory_llm_d_ideal_edge_1_df.pkl')     # Update directory location 4
trajectory_df_5 = pd.read_pickle('/content/drive/MyDrive/05_zero_shot_llm_3/02_data/04_test_trajectories_edge_1/4_llm_d_edge_1/5_trajectory_llm_d_ideal_edge_1_df.pkl')     # Update directory location 5

TESTING THE MODEL BY ANALYZING THE DATA FRAME <br>



1. LLM feedback Direct Edge 1 data frame 1<br>
trajectory_df_1 ---> 1_trajectory_llm_d_ideal_edge_1_df

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_1)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.88696575, 0.75, 0.3125, 0.0, 1.0, 0.09...",4,0.756118,"[1.0, 0.9994024, 0.75, 0.26068053, 0.0, 1.0, 0...",0,3
1,0,1,"[1.0, 0.9994024, 0.75, 0.26068053, 0.0, 1.0, 0...",4,0.737227,"[1.0, 1.0, 0.75, 0.25182518, 0.0, 1.0, 0.11317...",0,3
2,0,2,"[1.0, 1.0, 0.75, 0.25182518, 0.0, 1.0, 0.11317...",4,0.733999,"[1.0, 1.0, 0.75, 0.2503119, 0.0, 1.0, 0.113524...",0,3
3,0,3,"[1.0, 1.0, 0.75, 0.2503119, 0.0, 1.0, 0.113524...",4,0.733447,"[1.0, 1.0, 0.75, 0.2500533, 0.0, 1.0, 0.093136...",0,3
4,0,4,"[1.0, 1.0, 0.75, 0.2500533, 0.0, 1.0, 0.093136...",4,0.733353,"[1.0, 1.0, 0.75, 0.25000912, 0.0, 1.0, 0.07285...",0,3
...,...,...,...,...,...,...,...,...
1795,99,13,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.19691038, 1...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.17638753, 1...",0,3
1796,99,14,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.17638753, 1...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.15586469, 1...",0,3
1797,99,15,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.15586469, 1...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.046883374,...",0,3
1798,99,16,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.046883374,...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.03411416, ...",0,3


A. MISALIGNMENT FOR DATA FRAME (trajectory_df_1)

In [None]:
misalignment_df_1, misalignment_1 = calculate_misalignment(trajectory_df_1)
print("misalignment:", misalignment_1)

misalignment: 5.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(misalignment_df_1)

Unnamed: 0,episode,Misalignment Score
0,0,5.0
1,1,5.0
2,2,5.0
3,3,5.0
4,4,5.0
...,...,...
95,95,5.0
96,96,5.0
97,97,5.0
98,98,5.0


B. AVERAGE TERMINATE TIME FOR DATA FRAME (trajectory_df_1)

In [None]:
collision_rate_df_1, collision_rate_1 = calculate_collision_rate(trajectory_df_1)
print("average terminate time:", collision_rate_1)

average terminate time: 18.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(collision_rate_df_1)

Unnamed: 0,Episode,Total Steps
0,0,18
1,1,18
2,2,18
3,3,18
4,4,18
...,...,...
95,95,18
96,96,18
97,97,18
98,98,18


C. STABILITY OF LEARNING CURVES (trajectory_df_1)

In [None]:
learning_curve_result_df_1, variance_1, std_dev_1, final_reward_1 = analyze_learning_curve(trajectory_df_1)
print("Variance:", variance_1)
print("Standard Deviation:", std_dev_1)
print("average episodic reward:", final_reward_1)

Variance: 7.968291971929412e-29
Standard Deviation: 8.926528984957934e-15
average episodic reward: 12.560814668060118


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(learning_curve_result_df_1)

Unnamed: 0,episode,cumulative_reward,smoothed_reward
0,0,12.560815,12.560815
1,1,12.560815,12.560815
2,2,12.560815,12.560815
3,3,12.560815,12.560815
4,4,12.560815,12.560815
...,...,...,...
95,95,12.560815,12.560815
96,96,12.560815,12.560815
97,97,12.560815,12.560815
98,98,12.560815,12.560815


2. LLM feedback Direct Edge 1 data frame 2<br>
trajectory_df_2 ---> 2_trajectory_llm_d_ideal_edge_1_df

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_2)

Output hidden; open in https://colab.research.google.com to view.

A. MISALIGNMENT FOR DATA FRAME (trajectory_df_2)

In [None]:
misalignment_df_2, misalignment_2 = calculate_misalignment(trajectory_df_2)
print("misalignment:", misalignment_2)

misalignment: 5.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(misalignment_df_2)

Unnamed: 0,episode,Misalignment Score
0,0,5.0
1,1,5.0
2,2,5.0
3,3,5.0
4,4,5.0
...,...,...
95,95,5.0
96,96,5.0
97,97,5.0
98,98,5.0


B. AVERAGE TERMINATE TIME FOR DATA FRAME (trajectory_df_2)

In [None]:
collision_rate_df_2, collision_rate_2 = calculate_collision_rate(trajectory_df_2)
print("average terminate time:", collision_rate_2)

average terminate time: 39.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(collision_rate_df_2)

Unnamed: 0,Episode,Total Steps
0,0,39
1,1,39
2,2,39
3,3,39
4,4,39
...,...,...
95,95,39
96,96,39
97,97,39
98,98,39


C. STABILITY OF LEARNING CURVES (trajectory_df_2)

In [None]:
learning_curve_result_df_2, variance_2, std_dev_2, final_reward_2 = analyze_learning_curve(trajectory_df_2)
print("Variance:", variance_2)
print("Standard Deviation:", std_dev_2)
print("average episodic reward:", final_reward_2)

Variance: 2.0398827448139296e-28
Standard Deviation: 1.4282446375932695e-14
average episodic reward: 27.96081466806013


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(learning_curve_result_df_2)

Unnamed: 0,episode,cumulative_reward,smoothed_reward
0,0,27.960815,27.960815
1,1,27.960815,27.960815
2,2,27.960815,27.960815
3,3,27.960815,27.960815
4,4,27.960815,27.960815
...,...,...,...
95,95,27.960815,27.960815
96,96,27.960815,27.960815
97,97,27.960815,27.960815
98,98,27.960815,27.960815


3. LLM feedback Direct Edge 1 data frame 3<br>
trajectory_df_3 ---> 3_trajectory_llm_d_ideal_edge_1_df

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_3)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.8993783, 0.75, 0.3125, 0.0, 1.0, 0.100...",4,0.756118,"[1.0, 1.0, 0.75, 0.26068053, 0.0, 1.0, 0.09892...",0,3
1,0,1,"[1.0, 1.0, 0.75, 0.26068053, 0.0, 1.0, 0.09892...",4,0.737227,"[1.0, 1.0, 0.75, 0.25182518, 0.0, 1.0, 0.07545...",0,3
2,0,2,"[1.0, 1.0, 0.75, 0.25182518, 0.0, 1.0, 0.07545...",4,0.733999,"[1.0, 1.0, 0.75, 0.2503119, 0.0, 1.0, 0.054520...",0,3
3,0,3,"[1.0, 1.0, 0.75, 0.2503119, 0.0, 1.0, 0.054520...",4,0.733447,"[1.0, 1.0, 0.75, 0.2500533, 0.0, 1.0, 0.034019...",0,3
4,0,4,"[1.0, 1.0, 0.75, 0.2500533, 0.0, 1.0, 0.034019...",4,0.733353,"[1.0, 1.0, 0.75, 0.25000912, 0.0, 1.0, 0.01359...",0,3
...,...,...,...,...,...,...,...,...
1095,99,6,"[1.0, 1.0, 0.75, 0.25000155, 0.0, 1.0, -0.0068...",4,0.733334,"[1.0, 1.0, 0.75, 0.25000027, 0.0, 1.0, 0.01259...",0,3
1096,99,7,"[1.0, 1.0, 0.75, 0.25000027, 0.0, 1.0, 0.01259...",4,0.733333,"[1.0, 1.0, 0.75, 0.25000006, 0.0, 1.0, -0.0110...",0,3
1097,99,8,"[1.0, 1.0, 0.75, 0.25000006, 0.0, 1.0, -0.0110...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.034686707,...",0,3
1098,99,9,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.034686707,...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.046965007, ...",0,3


A. MISALIGNMENT FOR DATA FRAME (trajectory_df_3)

In [None]:
misalignment_df_3, misalignment_3 = calculate_misalignment(trajectory_df_3)
print("misalignment:", misalignment_3)

misalignment: 5.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(misalignment_df_3)

Unnamed: 0,episode,Misalignment Score
0,0,5.0
1,1,5.0
2,2,5.0
3,3,5.0
4,4,5.0
...,...,...
95,95,5.0
96,96,5.0
97,97,5.0
98,98,5.0


B. AVERAGE TERMINATE TIME FOR DATA FRAME (trajectory_df_3)

In [None]:
collision_rate_df_3, collision_rate_3 = calculate_collision_rate(trajectory_df_3)
print("average terminate time:", collision_rate_3)

average terminate time: 11.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(collision_rate_df_3)

Unnamed: 0,Episode,Total Steps
0,0,11
1,1,11
2,2,11
3,3,11
4,4,11
...,...,...
95,95,11
96,96,11
97,97,11
98,98,11


C. STABILITY OF LEARNING CURVES (trajectory_df_3)

In [None]:
learning_curve_result_df_3, variance_3, std_dev_3, final_reward_3 = analyze_learning_curve(trajectory_df_3)
print("Variance:", variance_3)
print("Standard Deviation:", std_dev_3)
print("average episodic reward:", final_reward_3)

Variance: 2.5817265989051294e-28
Standard Deviation: 1.606775217292428e-14
average episodic reward: 7.4274813341431205


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(learning_curve_result_df_3)

Unnamed: 0,episode,cumulative_reward,smoothed_reward
0,0,7.427481,7.427481
1,1,7.427481,7.427481
2,2,7.427481,7.427481
3,3,7.427481,7.427481
4,4,7.427481,7.427481
...,...,...,...
95,95,7.427481,7.427481
96,96,7.427481,7.427481
97,97,7.427481,7.427481
98,98,7.427481,7.427481


4. LLM feedback Direct Edge 1 data frame 4<br>
trajectory_df_4 ---> 4_trajectory_llm_d_ideal_edge_1_df

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_4)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.8877945, 0.75, 0.3125, 0.0, 1.0, 0.101...",4,0.756118,"[1.0, 1.0, 0.75, 0.26068053, 0.0, 1.0, 0.11382...",0,3
1,0,1,"[1.0, 1.0, 0.75, 0.26068053, 0.0, 1.0, 0.11382...",4,0.737227,"[1.0, 1.0, 0.75, 0.25182518, 0.0, 1.0, 0.11157...",0,3
2,0,2,"[1.0, 1.0, 0.75, 0.25182518, 0.0, 1.0, 0.11157...",4,0.733999,"[1.0, 1.0, 0.75, 0.2503119, 0.0, 1.0, 0.106369...",0,3
3,0,3,"[1.0, 1.0, 0.75, 0.2503119, 0.0, 1.0, 0.106369...",4,0.733447,"[1.0, 1.0, 0.75, 0.2500533, 0.0, 1.0, 0.098722...",0,3
4,0,4,"[1.0, 1.0, 0.75, 0.2500533, 0.0, 1.0, 0.098722...",4,0.733353,"[1.0, 1.0, 0.75, 0.25000912, 0.0, 1.0, 0.08956...",0,3
...,...,...,...,...,...,...,...,...
1795,99,13,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.08188206, -...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.082728624, ...",0,3
1796,99,14,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.082728624, ...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.08354383, -...",0,3
1797,99,15,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.08354383, -...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.07735787, -...",0,3
1798,99,16,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.07735787, -...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.02415142, -...",0,3


A. MISALIGNMENT FOR DATA FRAME (trajectory_df_4)

In [None]:
misalignment_df_4, misalignment_4 = calculate_misalignment(trajectory_df_4)
print("misalignment:", misalignment_4)

misalignment: 5.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(misalignment_df_4)

Unnamed: 0,episode,Misalignment Score
0,0,5.0
1,1,5.0
2,2,5.0
3,3,5.0
4,4,5.0
...,...,...
95,95,5.0
96,96,5.0
97,97,5.0
98,98,5.0


B. AVERAGE TERMINATE TIME FOR DATA FRAME (trajectory_df_4)

In [None]:
collision_rate_df_4, collision_rate_4 = calculate_collision_rate(trajectory_df_4)
print("average terminate time:", collision_rate_4)

average terminate time: 18.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(collision_rate_df_4)

Unnamed: 0,Episode,Total Steps
0,0,18
1,1,18
2,2,18
3,3,18
4,4,18
...,...,...
95,95,18
96,96,18
97,97,18
98,98,18


C. STABILITY OF LEARNING CURVES (trajectory_df_4)

In [None]:
learning_curve_result_df_4, variance_4, std_dev_4, final_reward_4 = analyze_learning_curve(trajectory_df_4)
print("Variance:", variance_4)
print("Standard Deviation:", std_dev_4)
print("average episodic reward:", final_reward_4)

Variance: 7.968291971929412e-29
Standard Deviation: 8.926528984957934e-15
average episodic reward: 12.560814668060118


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(learning_curve_result_df_4)

Unnamed: 0,episode,cumulative_reward,smoothed_reward
0,0,12.560815,12.560815
1,1,12.560815,12.560815
2,2,12.560815,12.560815
3,3,12.560815,12.560815
4,4,12.560815,12.560815
...,...,...,...
95,95,12.560815,12.560815
96,96,12.560815,12.560815
97,97,12.560815,12.560815
98,98,12.560815,12.560815


5. LLM feedback Direct Edge 1 data frame 5<br>
trajectory_df_5 ---> 5_trajectory_llm_d_ideal_edge_1_df

In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(trajectory_df_5)

Unnamed: 0,episode,time_step,state,action,reward,next_state,collision_flag,lane_index
0,0,0,"[1.0, 0.87540454, 0.75, 0.3125, 0.0, 1.0, 0.10...",4,0.756118,"[1.0, 0.98784125, 0.75, 0.26068053, 0.0, 1.0, ...",0,3
1,0,1,"[1.0, 0.98784125, 0.75, 0.26068053, 0.0, 1.0, ...",4,0.737227,"[1.0, 1.0, 0.75, 0.25182518, 0.0, 1.0, 0.09780...",0,3
2,0,2,"[1.0, 1.0, 0.75, 0.25182518, 0.0, 1.0, 0.09780...",4,0.733999,"[1.0, 1.0, 0.75, 0.2503119, 0.0, 1.0, 0.080069...",0,3
3,0,3,"[1.0, 1.0, 0.75, 0.2503119, 0.0, 1.0, 0.080069...",4,0.733447,"[1.0, 1.0, 0.75, 0.2500533, 0.0, 1.0, 0.062994...",0,3
4,0,4,"[1.0, 1.0, 0.75, 0.2500533, 0.0, 1.0, 0.062994...",4,0.733353,"[1.0, 1.0, 0.75, 0.25000912, 0.0, 1.0, 0.04603...",0,3
...,...,...,...,...,...,...,...,...
1495,99,10,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.0386703, 1...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.09521675, 1...",0,3
1496,99,11,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.09521675, 1...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.078240134, ...",0,3
1497,99,12,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, 0.078240134, ...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.04627808, ...",0,3
1498,99,13,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.04627808, ...",4,0.733333,"[1.0, 1.0, 0.75, 0.25, 0.0, 1.0, -0.028820703,...",0,3


A. MISALIGNMENT FOR DATA FRAME (trajectory_df_5)

In [None]:
misalignment_df_5, misalignment_5 = calculate_misalignment(trajectory_df_5)
print("misalignment:", misalignment_5)

misalignment: 5.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(misalignment_df_5)

Unnamed: 0,episode,Misalignment Score
0,0,5.0
1,1,5.0
2,2,5.0
3,3,5.0
4,4,5.0
...,...,...
95,95,5.0
96,96,5.0
97,97,5.0
98,98,5.0


B. AVERAGE TERMINATE TIME FOR DATA FRAME (trajectory_df_5)

In [None]:
collision_rate_df_5, collision_rate_5 = calculate_collision_rate(trajectory_df_5)
print("average terminate time:", collision_rate_5)

average terminate time: 15.0


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(collision_rate_df_5)

Unnamed: 0,Episode,Total Steps
0,0,15
1,1,15
2,2,15
3,3,15
4,4,15
...,...,...
95,95,15
96,96,15
97,97,15
98,98,15


C. STABILITY OF LEARNING CURVES (trajectory_df_5)

In [None]:
learning_curve_result_df_5, variance_5, std_dev_5, final_reward_5 = analyze_learning_curve(trajectory_df_5)
print("Variance:", variance_5)
print("Standard Deviation:", std_dev_5)
print("average episodic reward:", final_reward_5)

Variance: 7.171462774736471e-28
Standard Deviation: 2.67795869548738e-14
average episodic reward: 10.36081466805962


In [None]:
data_table.enable_dataframe_formatter()
data_table.DataTable(learning_curve_result_df_5)

Unnamed: 0,episode,cumulative_reward,smoothed_reward
0,0,10.360815,10.360815
1,1,10.360815,10.360815
2,2,10.360815,10.360815
3,3,10.360815,10.360815
4,4,10.360815,10.360815
...,...,...,...
95,95,10.360815,10.360815
96,96,10.360815,10.360815
97,97,10.360815,10.360815
98,98,10.360815,10.360815


COMBINED DATA FRAMES - LLM-D-EDGE-1

In [None]:
cummulative_reward = create_cummulative_reward(
        learning_curve_result_df_1,
        learning_curve_result_df_2,
        learning_curve_result_df_3,
        learning_curve_result_df_4,
        learning_curve_result_df_5
    )

data_table.enable_dataframe_formatter()
data_table.DataTable(cummulative_reward)

Unnamed: 0,episode,cumulative_reward_1,cumulative_reward_2,cumulative_reward_3,cumulative_reward_4,cumulative_reward_5,mean_cumulative_reward
0,0,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148
1,1,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148
2,2,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148
3,3,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148
4,4,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148
...,...,...,...,...,...,...,...
95,95,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148
96,96,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148
97,97,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148
98,98,12.560815,27.960815,7.427481,12.560815,10.360815,14.174148


FINAL MISALIGNMENT, AVERAGE TERMINATE TIME & AVERAGE EPISODIC REWARD : LLM-D

In [None]:
 # Organize the values into lists
misalignment_list_1 = [misalignment_1, misalignment_2, misalignment_3, misalignment_4, misalignment_5]
average_terminate_time_list_1 = [collision_rate_1, collision_rate_2, collision_rate_3, collision_rate_4, collision_rate_5]
average_episodic_reward_list_1 = [final_reward_1, final_reward_2, final_reward_3, final_reward_4, final_reward_5]

# Call the function to compute the mean performance values
mean_misalignment_1, mean_average_terminate_time_1 , mean_average_episodic_reward_1 = calculate_performance(misalignment_list_1, average_terminate_time_list_1, average_episodic_reward_list_1)

# Print the results
print("LLM_D EDGE CASE 1 FINAL MISALIGNMENT:", mean_misalignment_1)
print("LLM_D EDGE CASE 1 TERMINATE TIME:", mean_average_terminate_time_1)
print("LLM_D EDGE CASE 1 AVERAGE EPISODIC REWARD:", mean_average_episodic_reward_1)

LLM_D EDGE CASE 1 FINAL MISALIGNMENT: 5.0
LLM_D EDGE CASE 1 TERMINATE TIME: 20.2
LLM_D EDGE CASE 1 AVERAGE EPISODIC REWARD: 14.17414800127662
