# Transitioning to autonomous driving: <br>Mixed vehicle autonomy levels on freeways
### Notebook to analyse simulation warm-up time.

Jesse Poland<br>
TU Delft<br>
Date: 02-10-2024<br>

### 0. Python packages and loading data files

First, Python packages are imported to provide specific functionalities within the notebook. Then, the file paths are set to retrieve stored simulation data for all experiment runs and seeds. The simulation data is loaded into Pandas DataFrames, making the data ready for analysis.

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu
from IPython.display import display
import zipfile

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [3]:
# simulation output folder
experiment_name = 'full_level_runs_4'

# load Java simulation data
experiment_folder = fr'C:\Users\jesse\Documents\EPA_TUDelft\MasterThesis\thesis_experiments\SIMLAB-BOTLEK\{experiment_name}'

# available files
input_values = 'inputValues.csv'
intermediate_output = 'intermediateOutputData.csv'
single_output = 'singleOutputData.csv'
sequence_output = 'sequenceOutputData.csv'
lane_change_output = 'laneChangeOutputData.csv'

In [4]:
# function to get a list of all file/folder names within a folder
def get_file_names(path):
    return os.listdir(path)

# load available seed folders
seed_folders = get_file_names(experiment_folder)

In [5]:
# function to retrieve any data within the project folder as dataframe
def load_dataframe(columns_of_interest, folder, file, input_file=None):
    # create empty DataFrame
    df = pd.DataFrame()
    
    # loop through all seed folders from this experiment
    for seed_name in get_file_names(folder):
        # get all runs within this experiment
        run_folders = get_file_names(os.path.join(folder, seed_name))

        run_i = 0
        # go through all runs
        for run_folder in run_folders:
            run_number = int(run_folder.split('_')[1])
            data_folder = os.path.join(folder, seed_name, run_folder)

            if run_i > 1:
                break
            
            run_i += 1

            # open zip file (remove .csv form file name)
            # try:
            with zipfile.ZipFile(os.path.join(data_folder, fr'{file[:-4]}.zip'), 'r') as zip_ref:
                # read sequence csv
                with zip_ref.open(file) as data_file:
                    df_run = pd.read_csv(data_file)
                    df_interest = df_run[columns_of_interest].copy()
                    df_interest['run'] = run_number
            # add input values to the dataframe if the input file is specified
            if input_file is not None:
                # open input zip file
                with zipfile.ZipFile(os.path.join(data_folder, fr'{input_file[:-4]}.zip'), 'r') as zip_ref:
                    # read input csv
                    with zip_ref.open(input_file) as input_data_file:
                        df_input = pd.read_csv(input_data_file)
                # broadcast input data to all rows in df_interest
                for col in df_input.columns:
                    df_interest[col] = df_input[col].iloc[0]

            # add this data to the main DataFrame
            df = pd.concat([df, df_interest])
            # except:
            #     print(f'An error occured when trying to read data from: {data_folder}. This folder may contain a BadZipFile.')
    
    # return resulting DataFrame
    df.reset_index(drop=True, inplace=True)
    return df
    

### 1. Travel times
The travel time is used to determine the sample time. A traffic simulation should sample at least 3 times the maximum travel time.

In [None]:
# function to show boxplot stats
def show_boxplot_stats(df, column):
    # show statistics
    quartiles = df[column].quantile([0.25, 0.5, 0.75])
    q1 = quartiles[0.25]
    median = quartiles[0.5]
    q3 = quartiles[0.75]
    iqr = q3 - q1  # interquartile range
    lower_whisker = df[column][df[column] >= (q1 - 1.5 * iqr)].min()
    upper_whisker = df[column][df[column] <= (q3 + 1.5 * iqr)].max()
    mean = df[column].mean()
    
    # show the statistics
    boxplot_stats = {
        'Lower Whisker': lower_whisker,
        'Q1 (25th percentile)': q1,
        'Median (50th percentile)': median,
        'Q3 (75th percentile)': q3,
        'Upper Whisker': upper_whisker,
        'Mean': mean
    }
    for key, value in boxplot_stats.items():
        print(f'{key}: {value}')

# get travel times and remove NaN values which were created because of intermediate values for mean FD data
df_travel_time = load_dataframe(['travel_time'], experiment_folder, intermediate_output)
df_travel_time = df_travel_time.dropna()

# create figure
fig, ax = plt.subplots()
# create boxplot
ax.boxplot(df_travel_time['travel_time'])
# set title and labels
ax.set_title('Boxplot of travel_times')
ax.set_xlabel('Tracel_time')
ax.set_ylabel('seconds')

# show plot
plt.show()

# show stats
show_boxplot_stats(df_travel_time, 'mean_travel_time')