In [1]:
import pandas as pd
import numpy as np
import glob
import pickle

import os
from scipy.interpolate import interp1d
import re

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go

In [2]:
#ForKK Local
FileSource = "./GeneratedDataDouble/allData.pickle"
NYC_FileSource = "./GeneratedDataDouble/NYC_allData.pickle"
ISR_FileSource = "./GeneratedDataDouble/ISR_allData.pickle"
ImagePath = "./Visualizations/"

In [3]:
OutputColumns = ["ApproachRate", "ApproachRateOther", 
                 "Rel_Pos_Magnitude", 
                 "ScenarioTime", 
                 "Filtered_Accel1","Filtered_Accel2",
                 "Filtered_Steer1","Filtered_Steer2",
                 "1_Head_Center_Distance","2_Head_Center_Distance",
                 'Adjusted_1_Head_Center_Distance', 'Adjusted_2_Head_Center_Distance',
                 "Filtered_1_Head_Velocity_Total","Filtered_2_Head_Velocity_Total",
                 "1_Turn", "2_Turn",
                 "Centerline_Offset_1", "Centerline_Offset_2",
                 '1_Indicator', '2_Indicator',
                 "RelativeRotation"]

In [4]:
FileList = []
with open(FileSource, 'rb') as f:
    FileList = pickle.load(f)

In [5]:
def separate_letters_numbers(s):
    match = re.match(r"([a-zA-Z]+)([0-9]+)", s)
    if match:
        letters, numbers = match.groups()
        return letters.upper(), numbers
    else:
        return s, ""

In [6]:
def extract_scenario_location_run(file_name):
    base_name = os.path.basename(file_name)
    parts = base_name.split('_')
    scenario = parts[0]
    location = separate_letters_numbers(parts[1])[0]
    run = separate_letters_numbers(parts[1])[1] + '_' + parts[2].split('.')[0]  # Remove the file extension
    return scenario, location, run

In [7]:
def adjust_signs_based_on_min_distance(df, distance_col, adjusted_distance_col):
    # Initialize the adjusted distance column
    df[adjusted_distance_col] = df[distance_col]

    # Determine the point of minimum distance
    min_distance_index = df[distance_col].idxmin()
    
    # Adjust distances before the minimum distance (towards the center)
    df.loc[:min_distance_index, adjusted_distance_col] *= -1

    # Ensure only one direction change is applied
    direction_changed = False

    for i in range(1, len(df)):
        # Check the current direction based on the velocity and distance
        if i <= min_distance_index:
            # Should be negative, moving towards the center
            if df.loc[i, adjusted_distance_col] > 0:
                df.loc[i, adjusted_distance_col] *= -1
        else:
            # Should be positive, moving away from the center
            if df.loc[i, adjusted_distance_col] < 0:
                df.loc[i, adjusted_distance_col] *= -1

        # Allow only one change in direction after reaching the minimum distance
        if i > min_distance_index and not direction_changed:
            direction_changed = True

    return df


In [8]:
# Initialize dictionaries to hold data by scenario and location
data_by_scenario = {}
data_by_scenario_location = {}
data_by_run = {} 
data_by_participant = {}

for file_path in FileList:
    if not os.path.exists(file_path):
        print(f"Could not find {file_path}")
        continue
    
    # print(file_path)
    data = pd.read_feather(file_path)
    
    # Adjust distance with sign
    # data['1_Distance_Change'] = data['1_Head_Center_Distance'].diff()
    # data['Adjusted_1_Head_Center_Distance'] = data['1_Head_Center_Distance']
    # data.loc[data['1_Distance_Change'] < 0, 'Adjusted_1_Head_Center_Distance'] *= -1
    
    # data['2_Distance_Change'] = data['2_Head_Center_Distance'].diff()
    # data['Adjusted_2_Head_Center_Distance'] = data['2_Head_Center_Distance']
    # data.loc[data['2_Distance_Change'] < 0, 'Adjusted_2_Head_Center_Distance'] *= -1
    
    # Apply the adjustment to each run
    data = adjust_signs_based_on_min_distance(data, '1_Head_Center_Distance', 'Adjusted_1_Head_Center_Distance')
    data = adjust_signs_based_on_min_distance(data, '2_Head_Center_Distance', 'Adjusted_2_Head_Center_Distance')

    df = data[OutputColumns]
    
    # Extract scenario, location, and run from the file name
    scenario, location, run = extract_scenario_location_run(file_path)
    
    # Store features in dictionaries by run
    data_by_run[scenario + '_' + location + '_' + run] = df
    
    # Store features in dictionaries by scenario
    if scenario not in data_by_scenario:
        data_by_scenario[scenario] = []
    data_by_scenario[scenario].append(df)
    
    # Store features in dictionaries by participant
    participant = location + '_' + run
    if participant not in data_by_participant:
        data_by_participant[participant] = {}
    if scenario not in data_by_participant[participant]:
        data_by_participant[participant][scenario] = []
    data_by_participant[participant][scenario].append(df)
    
    # # Append data to the corresponding scenario and location
    # if scenario not in data_by_scenario_location:
    #     data_by_scenario_location[scenario] = {}
    # if location not in data_by_scenario_location[scenario]:
    #     data_by_scenario_location[scenario][location] = []
    # data_by_scenario_location[scenario][location].append(df)

In [9]:
len(data_by_run), len(data_by_participant), len(data_by_participant['NYC_22_1A']), len(data_by_participant['NYC_24_1A']['CP5'])

(928, 172, 3, 1)

# Feature Engineering


In [11]:
feature_by_participant = pd.DataFrame(data=data_by_participant.keys(), 
                                      columns=['Participant'])
feature_by_participant[['Free_Road_Delta', # Free-road acceleration parameter
                        'Deceleration', # Deceleration parameter
                        'Max_Accel', # Maximum acceleration
                        'Max_Speed', # Maximum speed
                        'Max_Speed', 
                        'Dist_w_CarB'
                        ]] = np.nan
feature_by_participant

Unnamed: 0,Participant,Free_Road_Delta,Deceleration,Max_Accel,Max_Speed,Dist_w_CarB
0,NYC_22_1A,,,,,
1,NYC_22_1B,,,,,
2,NYC_1_1A,,,,,
3,NYC_1_1B,,,,,
4,NYC_25_1A,,,,,
...,...,...,...,...,...,...
167,ISR_32_1B,,,,,
168,ISR_04_1A,,,,,
169,ISR_04_1B,,,,,
170,ISR_03_1A,,,,,


## Preferred Speed

In [12]:
for participant, scenarios in data_by_participant.items():
    max_speeds = []
    for scenario, df_list in scenarios.items():
        for df in df_list:
            max_speeds.append(df['Filtered_1_Head_Velocity_Total'].max())
    feature_by_participant.loc[feature_by_participant['Participant'] == participant, ['Max_Speed']] = np.mean(max_speeds)
feature_by_participant

Unnamed: 0,Participant,Free_Road_Delta,Deceleration,Max_Accel,Max_Speed,Dist_w_CarB
0,NYC_22_1A,,,,6.872677,
1,NYC_22_1B,,,,7.144224,
2,NYC_1_1A,,,,5.418505,
3,NYC_1_1B,,,,9.589591,
4,NYC_25_1A,,,,5.804289,
...,...,...,...,...,...,...
167,ISR_32_1B,,,,4.310480,
168,ISR_04_1A,,,,5.469113,
169,ISR_04_1B,,,,2.763167,
170,ISR_03_1A,,,,5.034987,


In [13]:
import plotly.express as px
import numpy as np
from scipy.stats import gaussian_kde

fig = px.histogram(feature_by_participant, x='Max_Speed', title='Speed Distribution', 
                   marginal='violin', histnorm='density')

# Calculate KDE
speed_data = feature_by_participant['Max_Speed']
kde = gaussian_kde(speed_data)
x_range = np.linspace(min(speed_data), max(speed_data), 1000)
fig.add_trace(go.Scatter(x=x_range, y=kde.evaluate(x_range)*100, mode='lines', name='KDE'))

fig.show()


## Maximum Acceleration

In [14]:
for participant, scenarios in data_by_participant.items():
    max_accel = []
    for scenario, df_list in scenarios.items():
        for df in df_list:
            max_accel.append(df['Filtered_Accel1'].max())
    feature_by_participant.loc[feature_by_participant['Participant'] == participant, ['Max_Accel']] = np.max(max_accel)
    # Max or mean?
feature_by_participant

Unnamed: 0,Participant,Free_Road_Delta,Deceleration,Max_Accel,Max_Speed,Dist_w_CarB
0,NYC_22_1A,,,0.99994,6.872677,
1,NYC_22_1B,,,1.00000,7.144224,
2,NYC_1_1A,,,0.57930,5.418505,
3,NYC_1_1B,,,1.00000,9.589591,
4,NYC_25_1A,,,0.80026,5.804289,
...,...,...,...,...,...,...
167,ISR_32_1B,,,0.87056,4.310480,
168,ISR_04_1A,,,0.63830,5.469113,
169,ISR_04_1B,,,0.56434,2.763167,
170,ISR_03_1A,,,0.69770,5.034987,


In [15]:
fig = px.histogram(feature_by_participant, x='Max_Accel', title='Accel Distribution', 
                   marginal='violin', histnorm='density')

# Calculate KDE
accel_data = feature_by_participant['Max_Accel']
kde = gaussian_kde(accel_data)
x_range = np.linspace(min(accel_data), max(accel_data), 1000)
fig.add_trace(go.Scatter(x=x_range, y=kde.evaluate(x_range)*100, mode='lines', name='KDE'))

fig.show()

## Free-Road Acceleration
- after turning at 30m: AV will accelerate (0.5m^2/s) to speed limit (5m/s)

https://www.sciencedirect.com/topics/computer-science/intelligent-driver-model
https://www.researchgate.net/publication/46158245_Enhanced_Intelligent_Driver_Model_to_Access_the_Impact_of_Driving_Strategies_on_Traffic_Capacity 

Suppose a vehicle has a preferred speed of $v_{0}$. Suppose the vehicle is moving at a speed v. If there is no traffic on road, the vehicle may show acceleration to attain the best speed proportional to the speed difference with the preferred speed, given by Eq. (21.2)

free-road acceleration strategy $\dot{v}_{free}(v) = a[1−(\frac{v}{v_{0}})^{\delta}]$

The free acceleration is characterized by the desired speed v0, the maximum acceleration a, and the exponent δ characterizing how the acceleration decreases with velocity (δ = 1 corresponds to a linear increase while δ → ∞ denotes a constant acceleration).

In [16]:
from scipy.optimize import curve_fit

# Define the free-road acceleration function
def free_road_acceleration(a, v_ratio, delta):
    return a * (1 - v_ratio ** delta)

# Fit the free-road acceleration curve for each participant
free_road_dist_threshold = -12

for participant, scenarios in data_by_participant.items():
    # Prepare data for fitting
    acceleration_data = []
    vel_ratio_data = []
    max_accel_data = []
    
    v0_values = feature_by_participant.loc[feature_by_participant['Participant'] == participant, 
                               ['Max_Speed']].values.flatten()
    a_values = feature_by_participant.loc[feature_by_participant['Participant'] == participant, 
                               ['Max_Accel']].values.flatten()
    
    for scenario, df_list in scenarios.items():
        for df in df_list:
            free_road_idx = df[(df['Adjusted_1_Head_Center_Distance'] < free_road_dist_threshold) # The car is moving towards the center
                               & (df['Filtered_Accel1'] > 0)].index # The car is accelerating
            for i in free_road_idx:
                acceleration_data.extend(df.loc[free_road_idx,'Filtered_Accel1'].values)
                vel_ratio_data.extend(df.loc[free_road_idx, 'Filtered_1_Head_Velocity_Total'].values / v0_values[0])
                max_accel_data.extend([a_values[0]] * len(free_road_idx))

    # Flatten the list of data
    acceleration_data = np.array(acceleration_data).flatten()
    vel_ratio_data = np.array(vel_ratio_data).flatten()
    max_accel_data = np.array(max_accel_data).flatten()
    # print(len(acceleration_data), len(vel_ratio_data), len(max_accel_data))

    if len(acceleration_data) > 0 and len(vel_ratio_data) > 0:
        try:
            # Fit the curve
            popt, pcov = curve_fit(lambda vel_ratio, delta: free_road_acceleration(max_accel_data, vel_ratio, delta), 
                                vel_ratio_data, acceleration_data, bounds=(1, np.inf))

            # Extract the fitted parameters
            delta_fitted = popt[0]

            # print(f"Fitted parameters: delta = {delta_fitted}")
            feature_by_participant.loc[feature_by_participant['Participant'] == participant, 
                                    ['Free_Road_Delta']] = delta_fitted
        except RuntimeError:
            print(f"Could not fit the curve for participant {participant}")
    else:
        print(f"No valid data for participant {participant}")

feature_by_participant

No valid data for participant NYC_5_1A


Unnamed: 0,Participant,Free_Road_Delta,Deceleration,Max_Accel,Max_Speed,Dist_w_CarB
0,NYC_22_1A,1.000000,,0.99994,6.872677,
1,NYC_22_1B,1.000000,,1.00000,7.144224,
2,NYC_1_1A,5.256831,,0.57930,5.418505,
3,NYC_1_1B,5.866160,,1.00000,9.589591,
4,NYC_25_1A,1.000000,,0.80026,5.804289,
...,...,...,...,...,...,...
167,ISR_32_1B,1.000000,,0.87056,4.310480,
168,ISR_04_1A,1.000000,,0.63830,5.469113,
169,ISR_04_1B,1.000000,,0.56434,2.763167,
170,ISR_03_1A,1.000000,,0.69770,5.034987,


In [22]:
fig = px.histogram(feature_by_participant, x='Free_Road_Delta', title='Free_Road_Delta Distribution', 
                   marginal='violin', histnorm='density')

# Calculate KDE
delta_data = feature_by_participant['Free_Road_Delta'].dropna()
kde = gaussian_kde(delta_data)
x_range = np.linspace(min(delta_data), max(delta_data), 1000)
fig.add_trace(go.Scatter(x=x_range, y=kde.evaluate(x_range)*100, mode='lines', name='KDE'))

fig.show()

## Deceleration Strategy

- within a distance with car B (10 m): AV will decelerate (1m^2/s) to stop
    - who has the right of way will yield earlier: stop at a point no matter how far the distance is with car B

https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4290188 

10.1016/j.trpro.2017.05.486 

Deceleration model: 

## Intersection Acceleration
- after the distance with car B reach (10 m) again: AV will accelerate to pass the intersection

In [63]:
run = 'CP5_NYC_24_1A'

feature_by_participant.loc[feature_by_participant['Participant'] == run, ['Acceleration']]
# feature_by_participant

KeyError: "None of [Index(['Acceleration'], dtype='object')] are in the [columns]"

In [54]:
for participant, scenario in data_by_participant.items():
    for scenario, df in scenario.items():
        
        
        feature_by_participant.loc[feature_by_participant['Participant'] == participant, 
                               ['Acceleration']] = df['Filtered_Accel1'].mean()

    break