In [1]:
import ast
import os
import random
import warnings

import autogluon.tabular
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import pandas as pd
import sklearn.linear_model
import sklearn.metrics
import sklearn.model_selection

pd.options.display.max_columns = 100
pd.options.display.max_colwidth = 200

warnings.filterwarnings("ignore", message="Can't initialize NVML")

In [2]:
DIRECTORY_DATA = 'data/model-autogluon'
os.makedirs(DIRECTORY_DATA, exist_ok=True)

# Preparing data

In [3]:
# FILENAME_DF_ALL_CLEAN = None
#FILENAME_DF_ALL_CLEAN = 'data/20241230_173555/df_all_clean.csv'
FILENAME_DF_ALL_CLEAN = 'data/20250128_094430/df_all_clean.csv'

In [4]:
SEPARATOR_COL = ': '


def col2parts(col: str) -> tuple[str, str]:
    parts = col.split(SEPARATOR_COL)
    if len(parts) == 1:
        return '', parts[0]
    assert len(parts) == 2
    return parts[0], parts[1]


if FILENAME_DF_ALL_CLEAN is not None:
    df_all = pd.read_csv(FILENAME_DF_ALL_CLEAN, header=list(range(2)), index_col=0)
    df_all.columns = [SEPARATOR_COL.join(col) for col in df_all.columns]
else:
    runnames = [
        '20241203_170129_all600',
        '20241213_104400_racing',
        '20241214_122216_racing_passhum',
    ]
    df_all = pd.concat(
        [pd.read_csv(f'data/{runname}/df_all.csv') 
         for runname in runnames],
        keys=runnames,
        names=['runname', 'row']
    )
    assert all(SEPARATOR_COL not in col for col in df_all.columns), df_all.columns

print(FILENAME_DF_ALL_CLEAN)
df_all.info()

data/20250128_094430/df_all_clean.csv
<class 'pandas.core.frame.DataFrame'>
Index: 1900 entries, map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing change of priorities to map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness without rerouting, forcing stops
Data columns (total 38 columns):
 #   Column                                                                             Non-Null Count  Dtype  
---  ------                                                                             --------------  -----  
 0   Violation type: Priority violation                                                 1900 non-null   bool   
 1   Violation type: Speed violation                                                    1900 non-null   bool   
 2   Violation type: Priority violation and Speed violation                             1900 non-null   bool   
 3   Coordination strategy: Change of 

In [5]:
def series2values(series):
    dtype = series.dtype
    if dtype == 'bool':
        return series.astype('int').values
    if dtype in ('int64', 'float64'):
        return series.values
    if dtype == 'object':
        return series.astype('category').cat.codes
    raise TypeError(f'{dtype} is not supported')
    

if FILENAME_DF_ALL_CLEAN is not None:
    COLUMNS_NONSPLIT = {}
    COLUMNS_INPUT = {col: series2values 
                     for col in df_all.columns 
                     if col2parts(col)[0] != 'Output of simulation (execution)'
                     and not col2parts(col)[1].startswith('~ ')} 
    COLUMNS_OUTPUT = {col: series2values
                      for col in df_all.columns 
                      if col2parts(col)[0] == 'Output of simulation (execution)'
                      and not col2parts(col)[1].startswith('~ ')}
else:
    COLUMNS_NONSPLIT = {
        'Scenario ID': lambda series: series.values,
    }
    COLUMNS_INPUT = {
        'Coordination strategy': lambda series: series.astype('category').cat.codes.values,
        'isCanPassFirstHum': lambda series: series.astype('int').values,
        'isRacingThroughCrossroadAllowed': lambda series: series.astype('int').values,
        
        'Vehicle ID': lambda series: series.values,
        'Linearization C': lambda series: [
            series.apply(lambda x: -1 if x is None else x[i])
            for i in range(len(series.dropna().iloc[0]))
        ],   
    }
    COLUMNS_OUTPUT = {
        'traveled total, m': lambda series: series.values,
        'No. of completed missions': lambda series: series.values,
        'No. of collisions': lambda series: series.values,
        'No. of near-misses': lambda series: series.values,
    }

COLUMNS_ALL = {**COLUMNS_NONSPLIT, **COLUMNS_INPUT, **COLUMNS_OUTPUT}
COLUMNS_ALL

{'Violation type: Priority violation': <function __main__.series2values(series)>,
 'Violation type: Speed violation': <function __main__.series2values(series)>,
 'Violation type: Priority violation and Speed violation': <function __main__.series2values(series)>,
 'Coordination strategy: Change of priorities': <function __main__.series2values(series)>,
 'Coordination strategy: Stops': <function __main__.series2values(series)>,
 'Coordination strategy: Rerouting': <function __main__.series2values(series)>,
 'Static map features: No. of OPs': <function __main__.series2values(series)>,
 'Static map features: Connectivity': <function __main__.series2values(series)>,
 'Output of simulation (planning): POD score for MV (V0)': <function __main__.series2values(series)>,
 'Output of simulation (planning): POD score for AV (V1)': <function __main__.series2values(series)>,
 'Output of simulation (planning): POD score for AV (V2)': <function __main__.series2values(series)>,
 'Output of simulation (

In [6]:
df_inout = df_all[list(COLUMNS_ALL)]
df_inout

Unnamed: 0,Violation type: Priority violation,Violation type: Speed violation,Violation type: Priority violation and Speed violation,Coordination strategy: Change of priorities,Coordination strategy: Stops,Coordination strategy: Rerouting,Static map features: No. of OPs,Static map features: Connectivity,Output of simulation (planning): POD score for MV (V0),Output of simulation (planning): POD score for AV (V1),Output of simulation (planning): POD score for AV (V2),Output of simulation (planning): POD score for AV (V3),Output of simulation (planning): Mean POD score for AVs (V1-V3),Output of simulation (planning): Mission length for MV (V0),Output of simulation (planning): Mission length for AV (V1),Output of simulation (planning): Mission length for AV (V2),Output of simulation (planning): Mission length for AV (V3),Output of simulation (planning): Total Mission length for AVs (V1-V3),Output of simulation (planning): Mean Mission length for AVs (V1-V3),Output of simulation (execution): No. of completed missions for AV (V1),Output of simulation (execution): No. of completed missions for AV (V2),Output of simulation (execution): No. of completed missions for AV (V3),Output of simulation (execution): Total No. of completed missions for AVs (V1-V3),Output of simulation (execution): No. of collisions for AV (V1),Output of simulation (execution): No. of collisions for AV (V2),Output of simulation (execution): No. of collisions for AV (V3),Output of simulation (execution): Total No. of collisions for AVs (V1-V3),Output of simulation (execution): Collision rate,Output of simulation (execution): No. of near-misses for AV (V1),Output of simulation (execution): No. of near-misses for AV (V2),Output of simulation (execution): No. of near-misses for AV (V3),Output of simulation (execution): Total No. of near-misses for AVs (V1-V3),Output of simulation (execution): Near-miss rate
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing change of priorities",True,False,False,True,False,False,2,high,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,21,8,5,34,2,1,0,3,0.143,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing ignoring human",True,False,False,False,False,False,2,high,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,23,8,5,36,3,5,0,8,0.364,1,0,0,1,0.045
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing no",False,False,False,False,False,False,2,high,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,22,8,5,35,0,0,0,0,0.000,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing stops",True,False,False,False,True,False,2,high,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,18,8,5,31,4,2,0,6,0.375,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness with rerouting, forcing change of priorities",False,False,True,True,False,True,2,high,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,19,7,4,30,0,1,0,1,0.200,0,0,0,0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness no, forcing stops",True,False,False,False,True,False,1,low,0.534,0.610,0.244,0.019,0.301,915,1115,1171,987,3273,1091.0,4,5,8,17,6,2,1,9,0.600,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness without rerouting, forcing change of priorities",False,False,True,True,False,False,1,low,0.534,0.610,0.244,0.019,0.301,915,1115,1171,987,3273,1091.0,3,4,6,13,0,0,1,1,0.333,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness without rerouting, forcing ignoring human",False,False,True,False,False,False,1,low,0.534,0.610,0.244,0.019,0.301,915,1115,1171,987,3273,1091.0,7,6,8,21,7,3,1,11,2.200,0,1,0,1,0.200
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness without rerouting, forcing no",False,True,False,False,False,False,1,low,0.534,0.610,0.244,0.019,0.301,915,1115,1171,987,3273,1091.0,3,6,8,17,0,0,0,0,0.000,0,0,0,0,0.000


In [7]:
def parse_tuple_string(tuple_string):
    if pd.isna(tuple_string):
        return None
    return ast.literal_eval(tuple_string)


def preprocess_inout(df_inout):
    df_inout = df_inout.copy()
    
    linearization_columns = [col for col in df_inout.columns if isinstance(col, str) and col.startswith('Linearization')]
    for col in linearization_columns:
        df_inout[col] = df_inout[col].apply(parse_tuple_string)
        
    dict_preprocessed = {}
    for col, series2data in COLUMNS_ALL.items():
        data = series2data(df_inout[col])
        if not isinstance(data, list):
            dict_preprocessed[col] = data
        else:
            for i, series in enumerate(data):
                dict_preprocessed[f'{col}#{i}'] = series
    
    return pd.DataFrame(dict_preprocessed)
    

df_preprocessed = preprocess_inout(df_inout)
df_preprocessed

Unnamed: 0,Violation type: Priority violation,Violation type: Speed violation,Violation type: Priority violation and Speed violation,Coordination strategy: Change of priorities,Coordination strategy: Stops,Coordination strategy: Rerouting,Static map features: No. of OPs,Static map features: Connectivity,Output of simulation (planning): POD score for MV (V0),Output of simulation (planning): POD score for AV (V1),Output of simulation (planning): POD score for AV (V2),Output of simulation (planning): POD score for AV (V3),Output of simulation (planning): Mean POD score for AVs (V1-V3),Output of simulation (planning): Mission length for MV (V0),Output of simulation (planning): Mission length for AV (V1),Output of simulation (planning): Mission length for AV (V2),Output of simulation (planning): Mission length for AV (V3),Output of simulation (planning): Total Mission length for AVs (V1-V3),Output of simulation (planning): Mean Mission length for AVs (V1-V3),Output of simulation (execution): No. of completed missions for AV (V1),Output of simulation (execution): No. of completed missions for AV (V2),Output of simulation (execution): No. of completed missions for AV (V3),Output of simulation (execution): Total No. of completed missions for AVs (V1-V3),Output of simulation (execution): No. of collisions for AV (V1),Output of simulation (execution): No. of collisions for AV (V2),Output of simulation (execution): No. of collisions for AV (V3),Output of simulation (execution): Total No. of collisions for AVs (V1-V3),Output of simulation (execution): Collision rate,Output of simulation (execution): No. of near-misses for AV (V1),Output of simulation (execution): No. of near-misses for AV (V2),Output of simulation (execution): No. of near-misses for AV (V3),Output of simulation (execution): Total No. of near-misses for AVs (V1-V3),Output of simulation (execution): Near-miss rate
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing change of priorities",1,0,0,1,0,0,2,0,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,21,8,5,34,2,1,0,3,0.143,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing ignoring human",1,0,0,0,0,0,2,0,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,23,8,5,36,3,5,0,8,0.364,1,0,0,1,0.045
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing no",0,0,0,0,0,0,2,0,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,22,8,5,35,0,0,0,0,0.000,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness no, forcing stops",1,0,0,0,1,0,2,0,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,18,8,5,31,4,2,0,6,0.375,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario1-1.json, passhum 0, slowness with rerouting, forcing change of priorities",0,0,1,1,0,1,2,0,0.180,0.092,0.033,0.099,0.076,778,255,943,1517,2715,905.0,19,7,4,30,0,1,0,1,0.200,0,0,0,0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness no, forcing stops",1,0,0,0,1,0,1,1,0.534,0.610,0.244,0.019,0.301,915,1115,1171,987,3273,1091.0,4,5,8,17,6,2,1,9,0.600,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness without rerouting, forcing change of priorities",0,0,1,1,0,0,1,1,0.534,0.610,0.244,0.019,0.301,915,1115,1171,987,3273,1091.0,3,4,6,13,0,0,1,1,0.333,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness without rerouting, forcing ignoring human",0,0,1,0,0,0,1,1,0.534,0.610,0.244,0.019,0.301,915,1115,1171,987,3273,1091.0,7,6,8,21,7,3,1,11,2.200,0,1,0,1,0.200
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-9.json, passhum 0, slowness without rerouting, forcing no",0,1,0,0,0,0,1,1,0.534,0.610,0.244,0.019,0.301,915,1115,1171,987,3273,1091.0,3,6,8,17,0,0,0,0,0.000,0,0,0,0,0.000


In [8]:
def show(obj, title=None):
    if title is not None:
        display(HTML(f"<h3>{title}</h3>"))
    display(obj)


def shuffle_df(df):
    # Shuffle by Scenario ID
    unique_scenarios = df['Scenario ID'].unique()  # Get unique Scenario IDs
    shuffled_scenarios = pd.Series(unique_scenarios).sample(frac=1, random_state=1).tolist()  # Shuffle Scenario IDs
    
    # Reorder the dataframe by the shuffled Scenario IDs
    df_shuffled = pd.concat([df[df['Scenario ID'] == scenario] for scenario in shuffled_scenarios])
    
    # Reset index (optional)
    df_shuffled = df_shuffled.reset_index(drop=True)
    
    return df_shuffled


def split_train_test(df):
    test_size = 0.2
    random_state = 1
    
    if not COLUMNS_NONSPLIT:
        return sklearn.model_selection.train_test_split(df, test_size=test_size, random_state=random_state)
    
    gss = sklearn.model_selection.GroupShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state)
    column, = list(COLUMNS_NONSPLIT)
    groups = df[column]
    
    # Split the data
    for train_idx, test_idx in gss.split(df, groups=groups):
        return shuffle_df(df.iloc[train_idx]), shuffle_df(df.iloc[test_idx])


df_train, df_test = split_train_test(df_preprocessed)
show(df_train, 'df_train')
show(df_test, 'df_test')

Unnamed: 0,Violation type: Priority violation,Violation type: Speed violation,Violation type: Priority violation and Speed violation,Coordination strategy: Change of priorities,Coordination strategy: Stops,Coordination strategy: Rerouting,Static map features: No. of OPs,Static map features: Connectivity,Output of simulation (planning): POD score for MV (V0),Output of simulation (planning): POD score for AV (V1),Output of simulation (planning): POD score for AV (V2),Output of simulation (planning): POD score for AV (V3),Output of simulation (planning): Mean POD score for AVs (V1-V3),Output of simulation (planning): Mission length for MV (V0),Output of simulation (planning): Mission length for AV (V1),Output of simulation (planning): Mission length for AV (V2),Output of simulation (planning): Mission length for AV (V3),Output of simulation (planning): Total Mission length for AVs (V1-V3),Output of simulation (planning): Mean Mission length for AVs (V1-V3),Output of simulation (execution): No. of completed missions for AV (V1),Output of simulation (execution): No. of completed missions for AV (V2),Output of simulation (execution): No. of completed missions for AV (V3),Output of simulation (execution): Total No. of completed missions for AVs (V1-V3),Output of simulation (execution): No. of collisions for AV (V1),Output of simulation (execution): No. of collisions for AV (V2),Output of simulation (execution): No. of collisions for AV (V3),Output of simulation (execution): Total No. of collisions for AVs (V1-V3),Output of simulation (execution): Collision rate,Output of simulation (execution): No. of near-misses for AV (V1),Output of simulation (execution): No. of near-misses for AV (V2),Output of simulation (execution): No. of near-misses for AV (V3),Output of simulation (execution): Total No. of near-misses for AVs (V1-V3),Output of simulation (execution): Near-miss rate
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-8.json, passhum 0, slowness no, forcing stops",1,0,0,0,1,0,1,1,0.675,0.723,0.169,0.079,0.387,915,1421,1075,783,3279,1093.000,3,5,9,17,5,2,1,8,0.533,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario9-4.json, passhum 0, slowness no, forcing stops",1,0,0,0,1,0,1,0,0.655,0.812,0.468,0.021,0.456,915,1097,1201,933,3231,1077.000,4,4,8,16,5,1,1,7,0.538,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario7-8.json, passhum 0, slowness no, forcing change of priorities",1,0,0,1,0,0,1,1,0.948,0.576,0.672,0.742,0.679,453,1049,1193,1847,4089,1363.000,5,2,2,9,5,7,10,22,1.100,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario7-8.json, passhum 0, slowness with rerouting, forcing no",0,1,0,0,0,1,1,0,0.797,0.764,0.742,0.633,0.699,455,1047,1135,1791,3973,1324.333,3,3,2,8,0,0,1,1,0.000,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario3-9.json, passhum 0, slowness no, forcing stops",1,0,0,0,1,0,1,1,0.186,0.055,0.021,0.156,0.093,731,859,905,1563,3327,1109.000,8,8,4,20,1,0,4,5,0.263,0,0,0,0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario8-2.json, passhum 0, slowness no, forcing stops",1,0,0,0,1,0,1,0,0.229,0.043,0.413,0.410,0.332,868,897,1549,1731,4177,1392.333,8,4,3,15,1,2,4,7,0.500,2,1,0,3,0.214
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario8-6.json, passhum 0, slowness no, forcing stops",1,0,0,0,1,0,1,1,0.539,0.479,0.445,0.205,0.335,868,1073,675,1769,3517,1172.333,5,7,3,15,5,2,6,13,0.520,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario9-9.json, passhum 0, slowness without rerouting, forcing change of priorities",0,0,1,1,0,0,1,0,0.503,0.554,0.153,0.019,0.250,915,1115,1131,987,3233,1077.667,3,4,6,13,0,0,0,0,0.000,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario2-10.json, passhum 0, slowness with rerouting, forcing change of priorities",0,0,1,1,0,1,1,0,0.770,0.844,0.640,0.152,0.419,587,703,469,1507,2679,893.000,1,6,4,11,1,0,0,1,0.333,2,2,0,2,0.667


Unnamed: 0,Violation type: Priority violation,Violation type: Speed violation,Violation type: Priority violation and Speed violation,Coordination strategy: Change of priorities,Coordination strategy: Stops,Coordination strategy: Rerouting,Static map features: No. of OPs,Static map features: Connectivity,Output of simulation (planning): POD score for MV (V0),Output of simulation (planning): POD score for AV (V1),Output of simulation (planning): POD score for AV (V2),Output of simulation (planning): POD score for AV (V3),Output of simulation (planning): Mean POD score for AVs (V1-V3),Output of simulation (planning): Mission length for MV (V0),Output of simulation (planning): Mission length for AV (V1),Output of simulation (planning): Mission length for AV (V2),Output of simulation (planning): Mission length for AV (V3),Output of simulation (planning): Total Mission length for AVs (V1-V3),Output of simulation (planning): Mean Mission length for AVs (V1-V3),Output of simulation (execution): No. of completed missions for AV (V1),Output of simulation (execution): No. of completed missions for AV (V2),Output of simulation (execution): No. of completed missions for AV (V3),Output of simulation (execution): Total No. of completed missions for AVs (V1-V3),Output of simulation (execution): No. of collisions for AV (V1),Output of simulation (execution): No. of collisions for AV (V2),Output of simulation (execution): No. of collisions for AV (V3),Output of simulation (execution): Total No. of collisions for AVs (V1-V3),Output of simulation (execution): Collision rate,Output of simulation (execution): No. of near-misses for AV (V1),Output of simulation (execution): No. of near-misses for AV (V2),Output of simulation (execution): No. of near-misses for AV (V3),Output of simulation (execution): Total No. of near-misses for AVs (V1-V3),Output of simulation (execution): Near-miss rate
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-4.json, passhum 0, slowness without rerouting, forcing ignoring human",0,0,1,0,0,0,1,1,0.831,1.064,0.772,0.020,0.657,915,1097,1275,933,3305,1101.667,4,4,9,17,5,6,0,11,2.750,0,0,1,1,0.250
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario3-7.json, passhum 0, slowness without rerouting, forcing change of priorities",0,0,1,1,0,0,1,0,0.686,0.890,0.552,0.487,0.652,731,995,1343,619,2957,985.667,3,2,6,11,1,1,0,2,0.667,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario1-7.json, passhum 0, slowness without rerouting, forcing no",0,1,0,0,0,0,2,1,0.161,0.019,0.048,0.124,0.075,780,605,993,1207,2805,935.000,12,8,7,27,0,0,0,0,0.000,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario3-8.json, passhum 0, slowness with rerouting, forcing stops",0,0,1,0,1,1,1,0,0.657,0.660,0.352,0.115,0.340,731,995,601,1451,3047,1015.667,4,9,4,17,1,0,0,1,0.250,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario9-7.json, passhum 0, slowness with rerouting, forcing stops",0,0,1,0,1,1,1,0,1.069,1.370,1.206,0.019,0.911,915,1201,1115,987,3303,1101.000,1,2,8,11,2,1,0,3,1.000,0,0,0,0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario2-10.json, passhum 0, slowness no, forcing no",0,0,0,0,0,0,1,0,0.770,0.844,0.640,0.152,0.419,587,703,469,1507,2679,893.000,8,10,5,23,0,0,0,0,0.000,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario7-4.json, passhum 0, slowness no, forcing no",0,0,0,0,0,0,1,0,0.371,0.015,0.035,0.377,0.203,455,821,929,1791,3541,1180.333,9,9,4,22,0,0,1,1,0.000,0,0,0,0,0.000
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario1-3.json, passhum 0, slowness without rerouting, forcing ignoring human",0,0,1,0,0,0,2,1,0.065,0.033,0.011,0.037,0.027,780,993,957,1107,3057,1019.000,8,8,7,23,2,1,2,5,0.833,1,0,1,2,0.333
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario3-1.json, passhum 0, slowness with rerouting, forcing no",0,1,0,0,0,1,1,0,0.552,0.650,0.334,0.025,0.352,731,995,619,873,2487,829.000,4,10,9,23,0,0,0,0,0.000,0,0,0,0,0.000


# Logistic regression (as a baseline)

In [9]:
def split_df_to_X_y(df):
    columns_input_df = [col for col in df.columns
                        if (col if not isinstance(col, str) else col.split('#')[0]) in COLUMNS_INPUT]
    columns_output_df = list(COLUMNS_OUTPUT)
    assert set(COLUMNS_NONSPLIT) | set(columns_input_df) | set(columns_output_df) == set(df.columns)
    
    X = df[columns_input_df]
    y = df[columns_output_df]    
    return X, y


def run_regression(df_train, df_test):
    X_train, y_train = split_df_to_X_y(df_train)
    X_test, y_test = split_df_to_X_y(df_test)
    
    model = sklearn.linear_model.LinearRegression()
    model.fit(X_train, y_train)
    ndarray_predictions = model.predict(X_test)
    df_predictions = pd.DataFrame(ndarray_predictions, columns=y_test.columns)
    return df_predictions


df_predictions_regression = run_regression(df_train, df_test)
df_predictions_regression

Unnamed: 0,Output of simulation (execution): No. of completed missions for AV (V1),Output of simulation (execution): No. of completed missions for AV (V2),Output of simulation (execution): No. of completed missions for AV (V3),Output of simulation (execution): Total No. of completed missions for AVs (V1-V3),Output of simulation (execution): No. of collisions for AV (V1),Output of simulation (execution): No. of collisions for AV (V2),Output of simulation (execution): No. of collisions for AV (V3),Output of simulation (execution): Total No. of collisions for AVs (V1-V3),Output of simulation (execution): Collision rate,Output of simulation (execution): No. of near-misses for AV (V1),Output of simulation (execution): No. of near-misses for AV (V2),Output of simulation (execution): No. of near-misses for AV (V3),Output of simulation (execution): Total No. of near-misses for AVs (V1-V3),Output of simulation (execution): Near-miss rate
0,4.268447,3.064138,7.993787,15.272254,5.173127,3.068021,1.664345,9.807510,1.849822,0.518772,0.312087,0.387347,0.968320,0.193326
1,4.126667,2.216703,7.495648,13.827523,1.847042,0.368770,-0.027742,2.012991,0.607083,-0.022882,0.172095,0.175896,0.331285,0.077045
2,12.746320,8.179543,6.641693,27.492869,-0.903304,-0.139891,-0.097345,-1.190791,-0.162646,0.037549,0.022679,-0.042133,0.066136,-0.007099
3,3.910101,7.695835,3.530458,15.132195,2.189456,0.612784,1.330125,3.952306,0.578131,0.116931,0.230256,0.056417,0.277285,0.058694
4,-0.368676,0.734385,5.817639,6.237054,3.451806,1.812405,-0.100479,5.044254,0.969340,0.213267,0.184084,0.067842,-0.006620,0.083074
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,7.987011,9.344696,5.246085,22.553829,1.873566,0.586458,1.607891,3.981668,0.191950,0.255092,0.069379,-0.030330,0.171423,0.005361
376,9.208360,7.942734,2.510357,19.680980,-0.429328,-0.059433,2.734843,2.098339,0.011651,0.177734,0.069250,-0.039025,0.270275,0.003311
377,10.077436,8.880487,7.886034,26.808451,2.167956,2.073427,1.804683,6.026792,1.229726,0.438402,0.222247,0.274106,0.949312,0.166713
378,4.865684,7.930621,7.316616,20.108028,1.645165,0.107756,-0.628476,1.015295,0.026371,0.016458,0.204157,0.058153,0.171518,0.012267


In [10]:
def save_and_show(fig, basename):  # to avoid inlining large image data into the notebook file
    filename = f'{DIRECTORY_DATA}/{basename}-{random.random()}.png'
    fig.savefig(filename)
    
    # The `random` is because of https://stackoverflow.com/a/43640705.
    display(HTML(f'<img src="{filename}?{random.random()}" alt="{basename}" />'))
    
    plt.close(fig)
    
    return filename


def evaluate_and_plot_column(df_test, df_predictions, column):
    y_test_column = df_test[column]
    predictions_column = df_predictions[column]
    
    r2 = sklearn.metrics.r2_score(y_test_column, predictions_column)
    name = col2parts(column)[1]
    print(f"{name}:")
    print(f"- R^2 Score: {r2}")

    # Plot results for each output column
    fig = plt.figure(figsize=(10, 6))
    plt.scatter(y_test_column, predictions_column, color='blue', alpha=0.5)
    plt.plot([y_test_column.min(), y_test_column.max()], [y_test_column.min(), y_test_column.max()], 'k--', lw=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title(f'Actual vs Predicted Values for {name}')
    plt.grid(True)
    save_and_show(fig, f'Actual_vs_Predicted_Values_{name}')


def evaluate_and_plot_all_columns(df_test, df_predictions):
    for column in COLUMNS_OUTPUT:
        evaluate_and_plot_column(df_test, df_predictions, column)
        
        
evaluate_and_plot_all_columns(df_test, df_predictions_regression)

No. of completed missions for AV (V1):
- R^2 Score: 0.8612154596611219


No. of completed missions for AV (V2):
- R^2 Score: 0.7981056239998034


No. of completed missions for AV (V3):
- R^2 Score: 0.8505626163963709


Total No. of completed missions for AVs (V1-V3):
- R^2 Score: 0.8651920750921314


No. of collisions for AV (V1):
- R^2 Score: 0.5109130896368467


No. of collisions for AV (V2):
- R^2 Score: 0.5252916195131528


No. of collisions for AV (V3):
- R^2 Score: 0.5243018854031933


Total No. of collisions for AVs (V1-V3):
- R^2 Score: 0.6990914495253582


Collision rate:
- R^2 Score: 0.5725598046870195


No. of near-misses for AV (V1):
- R^2 Score: 0.20575834525305237


No. of near-misses for AV (V2):
- R^2 Score: 0.08970771222862584


No. of near-misses for AV (V3):
- R^2 Score: 0.13802763476019209


Total No. of near-misses for AVs (V1-V3):
- R^2 Score: 0.2664758741640749


Near-miss rate:
- R^2 Score: 0.18269626555912055


# AutoGluon

In [11]:
def run_autogluon(df_train, df_test):
    X_train, y_train = split_df_to_X_y(df_train)
    X_test, y_test = split_df_to_X_y(df_test)
    
    # Train AutoGluon models
    predictors = []
    df_predictions = pd.DataFrame()
    for column in COLUMNS_OUTPUT:
        print(f'{column=}:')
        df_train_predictor = pd.concat([X_train, y_train[[column]]], axis=1)
        
        predictor = autogluon.tabular.TabularPredictor(
            label=column, 
            eval_metric='r2', 
            problem_type='regression',
        ).fit(
            df_train_predictor,
            presets='medium',
            hyperparameters={
                'GBM': {},       # LightGBM (TODO: something like `GBMLarge`)
                'XGB': {},       # XGBoost
                'RF': {},        # Random Forest
                'XT': {},        # Extra Trees
                # 'CAT': {},      # CatBoost, omitted if slow
                # 'NN': {},       # Neural net, if you want it
                # 'LR': {},       # Linear model
                # 'KNN': {},      # K-Nearest Neighbors
            },
        )
        predictors.append(predictor)
        
        df_predictions[column] = predictor.predict(X_test)

        # Leaderboard - Display a table of different models and their performance
        df_test_predictor = pd.concat([X_test, y_test[[column]]], axis=1)
        leaderboard = predictor.leaderboard(df_test_predictor, silent=True)
        show(leaderboard, f'Leaderboard for {column}')
        
        # Feature importance on training data
        # show(
        #     predictor.feature_importance(df_train_predictor),
        #     'feature_importance(df_train_predictor)'
        # )
        # 
        # # Feature importance on test data
        # show(
        #     predictor.feature_importance(df_test_predictor),
        #     'feature_importance(df_test_predictor)'
        # )
        # Example: SHAP values for a specific model
        # shap_values = predictor.get_model_shap_values(df_test_predictor, model='LightGBM')
        # show(shap_values, 'shap_values')  # SHAP values for each feature and each prediction
        
    return predictors, df_predictions


predictors, df_predictions_autogluon = run_autogluon(df_train, df_test)
df_predictions_autogluon

No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161850"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.42 GB / 31.09 GB (7.8%)
Disk Space Avail:   286.22 GB / 693.60 GB (41.3%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161850"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of completed missions for AV (V1)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2473.78 MB
	Train Data (Origin

column='Output of simulation (execution): No. of completed missions for AV (V1)':


  r = torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
	0.9785	 = Validation score   (r2)
	4.03s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.9742	 = Validation score   (r2)
	0.8s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.9716	 = Validation score   (r2)
	0.45s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.9813	 = Validation score   (r2)
	1.1s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'XGBoost': 0.643, 'LightGBM': 0.214, 'RandomForest': 0.143}
	0.9818	 = Validation score   (r2)
	0.04s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 6.68s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 6998.6 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenari

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.977101,0.981812,r2,0.077319,0.043437,5.972307,0.002331,0.000628,0.044208,2,True,5
1,XGBoost,0.976501,0.981279,r2,0.015499,0.003503,1.097715,0.015499,0.003503,1.097715,1,True,4
2,LightGBM,0.974868,0.978484,r2,0.009597,0.003868,4.032969,0.009597,0.003868,4.032969,1,True,1
3,ExtraTrees,0.972349,0.971632,r2,0.057431,0.0362,0.454296,0.057431,0.0362,0.454296,1,True,3
4,RandomForest,0.970985,0.974172,r2,0.049892,0.035438,0.797415,0.049892,0.035438,0.797415,1,True,2


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161857"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.39 GB / 31.09 GB (7.7%)
Disk Space Avail:   286.18 GB / 693.60 GB (41.3%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161857"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of completed missions for AV (V2)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2445.65 MB
	Train Data (Origin

column='Output of simulation (execution): No. of completed missions for AV (V2)':


	0.9585	 = Validation score   (r2)
	0.86s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.9456	 = Validation score   (r2)
	0.43s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.9521	 = Validation score   (r2)
	0.36s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.9584	 = Validation score   (r2)
	0.61s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBM': 0.467, 'XGBoost': 0.4, 'ExtraTrees': 0.133}
	0.9602	 = Validation score   (r2)
	0.04s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.51s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7311.7 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161857")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTrees,0.940712,0.952065,r2,0.053188,0.035186,0.359707,0.053188,0.035186,0.359707,1,True,3
1,WeightedEnsemble_L2,0.939293,0.960172,r2,0.075317,0.041577,1.870838,0.002105,0.000633,0.043045,2,True,5
2,XGBoost,0.936718,0.958382,r2,0.011419,0.002541,0.612521,0.011419,0.002541,0.612521,1,True,4
3,LightGBM,0.934696,0.958502,r2,0.008605,0.003217,0.855564,0.008605,0.003217,0.855564,1,True,1
4,RandomForest,0.929461,0.945572,r2,0.049015,0.034263,0.430322,0.049015,0.034263,0.430322,1,True,2


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161900"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.35 GB / 31.09 GB (7.6%)
Disk Space Avail:   286.15 GB / 693.60 GB (41.3%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161900"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of completed missions for AV (V3)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2406.78 MB
	Train Data (Origin

column='Output of simulation (execution): No. of completed missions for AV (V3)':


	0.9589	 = Validation score   (r2)
	0.71s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.9664	 = Validation score   (r2)
	0.41s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.9674	 = Validation score   (r2)
	0.34s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	0.9645	 = Validation score   (r2)
	0.53s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'ExtraTrees': 0.533, 'RandomForest': 0.2, 'LightGBM': 0.133, 'XGBoost': 0.133}
	0.9683	 = Validation score   (r2)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.24s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 3990.4 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161900")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTrees,0.948384,0.967426,r2,0.069616,0.034134,0.338833,0.069616,0.034134,0.338833,1,True,3
1,WeightedEnsemble_L2,0.945462,0.968323,r2,0.135163,0.076183,2.035195,0.001979,0.000742,0.052054,2,True,5
2,RandomForest,0.939237,0.966355,r2,0.049595,0.036301,0.406853,0.049595,0.036301,0.406853,1,True,2
3,XGBoost,0.934716,0.964521,r2,0.007184,0.002641,0.531134,0.007184,0.002641,0.531134,1,True,4
4,LightGBM,0.92812,0.958936,r2,0.006789,0.002364,0.706322,0.006789,0.002364,0.706322,1,True,1


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161902"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.35 GB / 31.09 GB (7.6%)
Disk Space Avail:   286.13 GB / 693.60 GB (41.3%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161902"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): Total No. of completed missions for AVs (V1-V3)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2407.90 MB
	Train Da

column='Output of simulation (execution): Total No. of completed missions for AVs (V1-V3)':


	0.9791	 = Validation score   (r2)
	1.03s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.964	 = Validation score   (r2)
	0.44s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.9608	 = Validation score   (r2)
	0.38s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.9748	 = Validation score   (r2)
	0.94s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBM': 0.783, 'RandomForest': 0.13, 'XGBoost': 0.087}
	0.9795	 = Validation score   (r2)
	0.04s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 3.06s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7196.9 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161902")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.966641,0.979515,r2,0.074989,0.04224,2.448771,0.002173,0.000643,0.043585,2,True,5
1,ExtraTrees,0.964581,0.960837,r2,0.060302,0.035129,0.3767,0.060302,0.035129,0.3767,1,True,3
2,RandomForest,0.963814,0.963997,r2,0.05009,0.035296,0.444047,0.05009,0.035296,0.444047,1,True,2
3,LightGBM,0.963464,0.979079,r2,0.006921,0.003267,1.026028,0.006921,0.003267,1.026028,1,True,1
4,XGBoost,0.961086,0.97483,r2,0.015805,0.003035,0.93511,0.015805,0.003035,0.93511,1,True,4


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161905"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.36 GB / 31.09 GB (7.6%)
Disk Space Avail:   286.08 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161905"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of collisions for AV (V1)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2420.35 MB
	Train Data (Original)  Mem

column='Output of simulation (execution): No. of collisions for AV (V1)':


	0.8807	 = Validation score   (r2)
	0.53s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.8849	 = Validation score   (r2)
	0.43s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.8879	 = Validation score   (r2)
	0.35s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.8735	 = Validation score   (r2)
	0.6s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'ExtraTrees': 0.444, 'LightGBM': 0.278, 'RandomForest': 0.222, 'XGBoost': 0.056}
	0.8918	 = Validation score   (r2)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.16s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 4102.7 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161905")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTrees,0.836287,0.887926,r2,0.050042,0.035176,0.345396,0.050042,0.035176,0.345396,1,True,3
1,WeightedEnsemble_L2,0.833753,0.891843,r2,0.110559,0.074097,1.949834,0.002,0.000746,0.051298,2,True,5
2,LightGBM,0.818774,0.880697,r2,0.002368,0.001137,0.525854,0.002368,0.001137,0.525854,1,True,1
3,RandomForest,0.817091,0.88492,r2,0.047966,0.034507,0.431287,0.047966,0.034507,0.431287,1,True,2
4,XGBoost,0.80783,0.873471,r2,0.008183,0.002531,0.595999,0.008183,0.002531,0.595999,1,True,4


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161908"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.41 GB / 31.09 GB (7.7%)
Disk Space Avail:   286.06 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161908"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of collisions for AV (V2)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2467.34 MB
	Train Data (Original)  Mem

column='Output of simulation (execution): No. of collisions for AV (V2)':


	0.7114	 = Validation score   (r2)
	0.52s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.6622	 = Validation score   (r2)
	0.47s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.7167	 = Validation score   (r2)
	0.36s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	0.6805	 = Validation score   (r2)
	0.47s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'ExtraTrees': 0.556, 'LightGBM': 0.444}
	0.7265	 = Validation score   (r2)
	0.04s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.05s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 8348.7 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161908")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTrees,0.743218,0.716739,r2,0.049575,0.034625,0.35572,0.049575,0.034625,0.35572,1,True,3
1,WeightedEnsemble_L2,0.741006,0.726474,r2,0.053913,0.036413,0.91772,0.00178,0.000618,0.044057,2,True,5
2,XGBoost,0.712761,0.680525,r2,0.009142,0.002012,0.46673,0.009142,0.002012,0.46673,1,True,4
3,LightGBM,0.706688,0.711386,r2,0.002558,0.00117,0.517943,0.002558,0.00117,0.517943,1,True,1
4,RandomForest,0.703975,0.662242,r2,0.045506,0.035582,0.470413,0.045506,0.035582,0.470413,1,True,2


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161910"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.41 GB / 31.09 GB (7.7%)
Disk Space Avail:   286.03 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161910"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of collisions for AV (V3)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2466.32 MB
	Train Data (Original)  Mem

column='Output of simulation (execution): No. of collisions for AV (V3)':


	0.8565	 = Validation score   (r2)
	1.17s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...


[1000]	valid_set's l2: 1.37623	valid_set's r2: 0.855045


	0.8807	 = Validation score   (r2)
	0.46s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.8848	 = Validation score   (r2)
	0.36s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.8882	 = Validation score   (r2)
	0.7s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'XGBoost': 0.556, 'ExtraTrees': 0.444}
	0.8938	 = Validation score   (r2)
	0.07s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.98s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7584.6 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161910")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.809041,0.893812,r2,0.065422,0.040081,1.127154,0.001849,0.00091,0.068769,2,True,5
1,XGBoost,0.80297,0.88818,r2,0.009894,0.003369,0.700016,0.009894,0.003369,0.700016,1,True,4
2,RandomForest,0.786864,0.880681,r2,0.058146,0.034437,0.455839,0.058146,0.034437,0.455839,1,True,2
3,ExtraTrees,0.786745,0.88483,r2,0.053679,0.035802,0.358369,0.053679,0.035802,0.358369,1,True,3
4,LightGBM,0.783932,0.856467,r2,0.012167,0.003937,1.173865,0.012167,0.003937,1.173865,1,True,1


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161913"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.45 GB / 31.09 GB (7.9%)
Disk Space Avail:   286.00 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161913"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): Total No. of collisions for AVs (V1-V3)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2508.64 MB
	Train Data (Orig

column='Output of simulation (execution): Total No. of collisions for AVs (V1-V3)':


	0.9141	 = Validation score   (r2)
	0.61s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.9115	 = Validation score   (r2)
	0.54s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.9151	 = Validation score   (r2)
	0.39s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	0.9179	 = Validation score   (r2)
	0.8s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'XGBoost': 0.458, 'ExtraTrees': 0.333, 'LightGBM': 0.208}
	0.9231	 = Validation score   (r2)
	0.07s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.63s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7435.9 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161913")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,XGBoost,0.888838,0.917889,r2,0.012412,0.003799,0.799975,0.012412,0.003799,0.799975,1,True,4
1,WeightedEnsemble_L2,0.887246,0.923115,r2,0.085406,0.040883,1.868536,0.002266,0.000908,0.065713,2,True,5
2,RandomForest,0.869403,0.911524,r2,0.06243,0.034878,0.541467,0.06243,0.034878,0.541467,1,True,2
3,LightGBM,0.866019,0.914126,r2,0.00304,0.001406,0.608252,0.00304,0.001406,0.608252,1,True,1
4,ExtraTrees,0.858655,0.915133,r2,0.067688,0.034769,0.394595,0.067688,0.034769,0.394595,1,True,3


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161916"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.48 GB / 31.09 GB (8.0%)
Disk Space Avail:   285.97 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161916"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): Collision rate
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2537.45 MB
	Train Data (Original)  Memory Usage: 0.21

column='Output of simulation (execution): Collision rate':


	0.8579	 = Validation score   (r2)
	0.54s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.8843	 = Validation score   (r2)
	0.53s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.8863	 = Validation score   (r2)
	0.35s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.883	 = Validation score   (r2)
	0.58s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'ExtraTrees': 0.48, 'XGBoost': 0.44, 'RandomForest': 0.08}
	0.8953	 = Validation score   (r2)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.27s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 4048.6 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161916")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.863112,0.895306,r2,0.137882,0.075088,1.51511,0.002278,0.000752,0.05301,2,True,5
1,RandomForest,0.853024,0.884258,r2,0.062438,0.035379,0.534945,0.062438,0.035379,0.534945,1,True,2
2,ExtraTrees,0.846839,0.886333,r2,0.062686,0.0361,0.350928,0.062686,0.0361,0.350928,1,True,3
3,XGBoost,0.845981,0.883016,r2,0.01048,0.002856,0.576227,0.01048,0.002856,0.576227,1,True,4
4,LightGBM,0.837357,0.857909,r2,0.002154,0.001045,0.539774,0.002154,0.001045,0.539774,1,True,1


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161919"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.50 GB / 31.09 GB (8.1%)
Disk Space Avail:   285.93 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161919"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of near-misses for AV (V1)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2565.27 MB
	Train Data (Original)  Me

column='Output of simulation (execution): No. of near-misses for AV (V1)':
[1000]	valid_set's l2: 0.546636	valid_set's r2: 0.37047


	0.3709	 = Validation score   (r2)
	1.43s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: RandomForest ...
	0.4536	 = Validation score   (r2)
	0.48s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.4758	 = Validation score   (r2)
	0.38s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.3896	 = Validation score   (r2)
	0.65s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'ExtraTrees': 0.81, 'LightGBM': 0.19}
	0.4823	 = Validation score   (r2)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 3.23s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7214.7 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161919")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTrees,0.447373,0.475751,r2,0.051682,0.035636,0.382868,0.051682,0.035636,0.382868,1,True,3
1,WeightedEnsemble_L2,0.411268,0.482297,r2,0.07028,0.042136,1.856648,0.002087,0.000667,0.048021,2,True,5
2,RandomForest,0.33683,0.453616,r2,0.059708,0.035079,0.475436,0.059708,0.035079,0.475436,1,True,2
3,XGBoost,0.219938,0.389645,r2,0.01133,0.002688,0.652184,0.01133,0.002688,0.652184,1,True,4
4,LightGBM,0.028748,0.370856,r2,0.016511,0.005834,1.42576,0.016511,0.005834,1.42576,1,True,1


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161922"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.54 GB / 31.09 GB (8.2%)
Disk Space Avail:   285.91 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161922"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of near-misses for AV (V2)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2600.10 MB
	Train Data (Original)  Me

column='Output of simulation (execution): No. of near-misses for AV (V2)':


	0.2916	 = Validation score   (r2)
	0.57s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.1817	 = Validation score   (r2)
	0.48s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.3196	 = Validation score   (r2)
	0.37s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: XGBoost ...
	0.3466	 = Validation score   (r2)
	0.62s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'XGBoost': 0.5, 'ExtraTrees': 0.3, 'LightGBM': 0.2}
	0.378	 = Validation score   (r2)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.31s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7798.6 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161922")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM,0.248871,0.291574,r2,0.002851,0.001516,0.572171,0.002851,0.001516,0.572171,1,True,1
1,WeightedEnsemble_L2,0.246655,0.378047,r2,0.063045,0.038981,1.604759,0.002193,0.000866,0.047773,2,True,5
2,ExtraTrees,0.184296,0.319643,r2,0.048637,0.034111,0.365102,0.048637,0.034111,0.365102,1,True,3
3,RandomForest,0.14046,0.181706,r2,0.049181,0.035715,0.479374,0.049181,0.035715,0.479374,1,True,2
4,XGBoost,0.128567,0.346554,r2,0.009365,0.002487,0.619713,0.009365,0.002487,0.619713,1,True,4


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161925"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.52 GB / 31.09 GB (8.1%)
Disk Space Avail:   285.89 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161925"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): No. of near-misses for AV (V3)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2576.49 MB
	Train Data (Original)  Me

column='Output of simulation (execution): No. of near-misses for AV (V3)':


	0.083	 = Validation score   (r2)
	0.48s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.1263	 = Validation score   (r2)
	0.48s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.1361	 = Validation score   (r2)
	0.35s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.1344	 = Validation score   (r2)
	0.57s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'ExtraTrees': 0.476, 'XGBoost': 0.429, 'RandomForest': 0.095}
	0.1621	 = Validation score   (r2)
	0.06s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.13s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 4169.4 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161925")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,WeightedEnsemble_L2,0.148879,0.162063,r2,0.10678,0.072912,1.448131,0.002091,0.000691,0.056601,2,True,5
1,ExtraTrees,0.131544,0.136074,r2,0.048467,0.035174,0.348963,0.048467,0.035174,0.348963,1,True,3
2,LightGBM,0.117465,0.083036,r2,0.001458,0.000894,0.483085,0.001458,0.000894,0.483085,1,True,1
3,RandomForest,0.101084,0.126347,r2,0.046977,0.034719,0.475702,0.046977,0.034719,0.475702,1,True,2
4,XGBoost,0.07509,0.134386,r2,0.009245,0.002328,0.566865,0.009245,0.002328,0.566865,1,True,4


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161927"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.52 GB / 31.09 GB (8.1%)
Disk Space Avail:   285.88 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161927"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): Total No. of near-misses for AVs (V1-V3)
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2578.25 MB
	Train Data (Ori

column='Output of simulation (execution): Total No. of near-misses for AVs (V1-V3)':


	0.4835	 = Validation score   (r2)
	0.95s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.4938	 = Validation score   (r2)
	0.45s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.5595	 = Validation score   (r2)
	0.36s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.5328	 = Validation score   (r2)
	0.52s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'ExtraTrees': 0.619, 'XGBoost': 0.381}
	0.5756	 = Validation score   (r2)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.55s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7938.9 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161927")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,ExtraTrees,0.460668,0.559522,r2,0.048997,0.035164,0.355289,0.048997,0.035164,0.355289,1,True,3
1,WeightedEnsemble_L2,0.425864,0.575619,r2,0.060456,0.038292,0.922537,0.00182,0.00073,0.052145,2,True,5
2,RandomForest,0.339602,0.493761,r2,0.048973,0.03472,0.452673,0.048973,0.03472,0.452673,1,True,2
3,XGBoost,0.274844,0.532804,r2,0.009638,0.002398,0.515102,0.009638,0.002398,0.515102,1,True,4
4,LightGBM,0.167474,0.483473,r2,0.008737,0.003198,0.953344,0.008737,0.003198,0.953344,1,True,1


No path specified. Models will be saved in: "AutogluonModels/ag-20250201_161930"
Preset alias specified: 'medium' maps to 'medium_quality'.
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.12.2
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #140~20.04.1-Ubuntu SMP Wed Dec 18 21:35:34 UTC 2024
CPU Count:          16
Memory Avail:       2.54 GB / 31.09 GB (8.2%)
Disk Space Avail:   285.86 GB / 693.60 GB (41.2%)
Presets specified: ['medium']
Beginning AutoGluon training ...
AutoGluon will save models to "/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161930"
Train Data Rows:    1520
Train Data Columns: 19
Label Column:       Output of simulation (execution): Near-miss rate
Problem Type:       regression
Preprocessing data ...
Using Feature Generators to preprocess the data ...
Fitting AutoMLPipelineFeatureGenerator...
	Available Memory:                    2599.60 MB
	Train Data (Original)  Memory Usage: 0.21

column='Output of simulation (execution): Near-miss rate':


	0.3403	 = Validation score   (r2)
	0.51s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: RandomForest ...
	0.3337	 = Validation score   (r2)
	0.45s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: ExtraTrees ...
	0.3732	 = Validation score   (r2)
	0.36s	 = Training   runtime
	0.04s	 = Validation runtime
Fitting model: XGBoost ...
	0.3628	 = Validation score   (r2)
	0.56s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'ExtraTrees': 0.533, 'XGBoost': 0.467}
	0.406	 = Validation score   (r2)
	0.05s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.14s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 7855.5 rows/s (304 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/olga/coordination_oru/scenario-analysis/AutogluonModels/ag-20250201_161930")


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM,0.365662,0.3403,r2,0.002632,0.001062,0.509687,0.002632,0.001062,0.509687,1,True,1
1,ExtraTrees,0.347325,0.373209,r2,0.04621,0.035575,0.355206,0.04621,0.035575,0.355206,1,True,3
2,WeightedEnsemble_L2,0.34456,0.406035,r2,0.055427,0.038699,0.965572,0.001386,0.000605,0.04859,2,True,5
3,RandomForest,0.341045,0.333677,r2,0.045678,0.035034,0.454745,0.045678,0.035034,0.454745,1,True,2
4,XGBoost,0.240465,0.362759,r2,0.00783,0.002519,0.561776,0.00783,0.002519,0.561776,1,True,4


Unnamed: 0,Output of simulation (execution): No. of completed missions for AV (V1),Output of simulation (execution): No. of completed missions for AV (V2),Output of simulation (execution): No. of completed missions for AV (V3),Output of simulation (execution): Total No. of completed missions for AVs (V1-V3),Output of simulation (execution): No. of collisions for AV (V1),Output of simulation (execution): No. of collisions for AV (V2),Output of simulation (execution): No. of collisions for AV (V3),Output of simulation (execution): Total No. of collisions for AVs (V1-V3),Output of simulation (execution): Collision rate,Output of simulation (execution): No. of near-misses for AV (V1),Output of simulation (execution): No. of near-misses for AV (V2),Output of simulation (execution): No. of near-misses for AV (V3),Output of simulation (execution): Total No. of near-misses for AVs (V1-V3),Output of simulation (execution): Near-miss rate
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario9-4.json, passhum 0, slowness without rerouting, forcing ignoring human",5.589739,4.149848,8.160124,18.491026,6.212653,3.740118,0.995106,10.647227,3.697789,0.139650,0.238680,0.218712,0.694233,0.206233
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario3-7.json, passhum 0, slowness without rerouting, forcing change of priorities",3.274708,2.500108,7.644654,11.986833,0.073645,0.917783,0.138039,1.401638,0.321029,0.211980,0.066935,0.024422,0.217045,0.074977
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario1-7.json, passhum 0, slowness without rerouting, forcing no",11.841349,7.437706,5.961347,25.504454,0.017724,0.061246,0.028763,0.060351,0.000050,0.022100,0.039443,0.000984,0.011852,-0.001734
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario3-8.json, passhum 0, slowness with rerouting, forcing stops",3.929858,6.995964,3.312943,15.533357,1.095394,0.541181,0.075321,1.795174,0.412915,-0.039017,-0.025677,0.021962,0.013024,-0.008539
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario9-7.json, passhum 0, slowness with rerouting, forcing stops",1.062115,2.082721,7.728716,11.301843,1.777022,0.901281,0.014739,2.506600,0.882983,0.184882,-0.035320,0.004393,0.116308,0.055339
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario2-10.json, passhum 0, slowness no, forcing no",8.439201,8.976551,5.037211,22.123932,0.174881,0.159996,0.362591,1.157085,-0.003275,0.069254,0.163727,-0.007836,0.055389,-0.006749
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario7-4.json, passhum 0, slowness no, forcing no",9.542936,8.136394,4.104001,22.695917,-0.019212,0.025280,1.719576,1.764237,-0.000865,0.111439,0.122100,0.000338,0.161786,0.019705
"map-generator/generated-maps/2024-11-28_13:19:18_without_bridges/scenario1-3.json, passhum 0, slowness without rerouting, forcing ignoring human",8.455648,8.194592,7.124763,24.004978,0.940974,1.022020,1.636447,3.969981,0.683569,0.208878,0.563580,0.233214,0.932520,0.135652
"map-generator/generated-maps/2024-11-28_13:17:39_with_bridges/scenario3-1.json, passhum 0, slowness with rerouting, forcing no",3.988708,9.877445,8.773148,22.786978,0.054587,0.014463,0.041989,0.095518,0.004087,0.012137,0.021923,-0.002123,0.024085,-0.001237


## evaluate_and_plot_all_columns

In [12]:
evaluate_and_plot_all_columns(df_test, df_predictions_autogluon)

No. of completed missions for AV (V1):
- R^2 Score: 0.9771006107330322


No. of completed missions for AV (V2):
- R^2 Score: 0.9392932653427124


No. of completed missions for AV (V3):
- R^2 Score: 0.9454619288444519


Total No. of completed missions for AVs (V1-V3):
- R^2 Score: 0.9666406512260437


No. of collisions for AV (V1):
- R^2 Score: 0.8337531089782715


No. of collisions for AV (V2):
- R^2 Score: 0.7410061955451965


No. of collisions for AV (V3):
- R^2 Score: 0.8090411424636841


Total No. of collisions for AVs (V1-V3):
- R^2 Score: 0.8872459530830383


Collision rate:
- R^2 Score: 0.8631116610907708


No. of near-misses for AV (V1):
- R^2 Score: 0.41126757860183716


No. of near-misses for AV (V2):
- R^2 Score: 0.24665457010269165


No. of near-misses for AV (V3):
- R^2 Score: 0.14887893199920654


Total No. of near-misses for AVs (V1-V3):
- R^2 Score: 0.4258642792701721


Near-miss rate:
- R^2 Score: 0.3445602233785867


## explain_predictions

In [13]:
def explain_predictions(predictors, df_test):
    # X_test, y_test = split_df_to_X_y(df_test)
    
    for column, predictor in zip(COLUMNS_OUTPUT, predictors):
        # df_test_predictor = pd.concat([X_test, y_test[[column]]], axis=1)
        
        tree_model = predictor._trainer.load_model('LightGBM')
        tree_importance = tree_model.model.feature_importance(importance_type='gain')
        # show(tree_importance, column)
        
        feature_names = predictor.feature_metadata.get_features()
        df = pd.DataFrame.from_dict(
            {name: {'importance': value} for name, value in zip(feature_names, tree_importance)},
            orient='index'
        )
        # print(df.index)
        # Group indexes by their base name before `#`
        df['group'] = df.index.str.extract(r'^(.+?)(?:#\d+)?$', expand=False)
        df = df.groupby('group')['importance'].sum().to_frame()
        
        df.sort_values(by='importance', ascending=False, inplace=True)
        show(df, column)
        
        
explain_predictions(predictors, df_test)

Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Output of simulation (planning): Mission length for AV (V1),181734.40133
Output of simulation (planning): POD score for AV (V1),27165.209342
Output of simulation (planning): POD score for MV (V0),4892.511099
Output of simulation (planning): Mean POD score for AVs (V1-V3),4469.951069
Static map features: No. of OPs,4237.13755
Coordination strategy: Change of priorities,4169.873595
Coordination strategy: Stops,4042.401453
Output of simulation (planning): POD score for AV (V3),3349.85259
Violation type: Speed violation,2804.743432
Output of simulation (planning): Mission length for MV (V0),2584.846799


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Output of simulation (planning): Mission length for AV (V2),69526.57151
Output of simulation (planning): Mean POD score for AVs (V1-V3),7625.195556
Output of simulation (planning): Total Mission length for AVs (V1-V3),6163.816622
Output of simulation (planning): POD score for AV (V2),3533.626081
Coordination strategy: Stops,2852.134086
Output of simulation (planning): Mission length for AV (V1),2368.591103
Output of simulation (planning): Mission length for AV (V3),1835.280613
Coordination strategy: Change of priorities,1747.547193
Output of simulation (planning): POD score for AV (V3),1747.314079
Output of simulation (planning): POD score for AV (V1),1639.63602


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Output of simulation (planning): Mission length for AV (V3),57701.211801
Output of simulation (planning): POD score for AV (V3),10438.771513
Coordination strategy: Stops,2870.845508
Coordination strategy: Change of priorities,2238.207131
Output of simulation (planning): POD score for MV (V0),1657.234086
Violation type: Priority violation,1499.986478
Output of simulation (planning): POD score for AV (V2),1408.196098
Output of simulation (planning): Mean POD score for AVs (V1-V3),1108.492473
Violation type: Speed violation,985.844375
Output of simulation (planning): Mission length for MV (V0),905.258141


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Static map features: No. of OPs,262399.754674
Output of simulation (planning): Mission length for AV (V1),206289.040317
Output of simulation (planning): Mission length for AV (V2),77778.135768
Output of simulation (planning): Mean POD score for AVs (V1-V3),45853.096627
Output of simulation (planning): Total Mission length for AVs (V1-V3),36532.453456
Coordination strategy: Stops,27149.775849
Coordination strategy: Change of priorities,21147.424346
Output of simulation (planning): POD score for MV (V0),16995.050604
Output of simulation (planning): POD score for AV (V2),16112.924688
Output of simulation (planning): POD score for AV (V1),11424.53785


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Violation type: Priority violation,27837.635014
Output of simulation (planning): POD score for AV (V1),27653.659334
Coordination strategy: Stops,11448.431178
Output of simulation (planning): POD score for MV (V0),8617.606841
Coordination strategy: Change of priorities,5124.70526
Violation type: Priority violation and Speed violation,2946.469482
Output of simulation (planning): Mission length for AV (V1),2013.243008
Output of simulation (planning): POD score for AV (V2),1805.38948
Output of simulation (planning): Mission length for AV (V3),1517.814349
Output of simulation (planning): Mean POD score for AVs (V1-V3),1283.596182


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Violation type: Priority violation,9297.685122
Coordination strategy: Stops,5422.391733
Output of simulation (planning): Mission length for AV (V2),3681.581553
Output of simulation (planning): POD score for AV (V2),3526.894646
Coordination strategy: Change of priorities,2980.515676
Output of simulation (planning): POD score for MV (V0),2243.662881
Output of simulation (planning): Mission length for MV (V0),1972.861205
Violation type: Priority violation and Speed violation,1738.930032
Output of simulation (planning): POD score for AV (V3),1484.470587
Output of simulation (planning): Mean POD score for AVs (V1-V3),1230.405244


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Violation type: Priority violation,22474.295086
Output of simulation (planning): Mission length for AV (V3),18008.881566
Output of simulation (planning): Mission length for MV (V0),14955.199381
Coordination strategy: Stops,8901.382119
Coordination strategy: Change of priorities,5527.685506
Output of simulation (planning): POD score for AV (V3),3920.978292
Violation type: Priority violation and Speed violation,3350.124344
Output of simulation (planning): Total Mission length for AVs (V1-V3),2785.087962
Output of simulation (planning): POD score for MV (V0),2291.542937
Output of simulation (planning): POD score for AV (V2),1886.329997


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Violation type: Priority violation,144953.369799
Output of simulation (planning): POD score for MV (V0),72576.64945
Coordination strategy: Stops,63383.283417
Coordination strategy: Change of priorities,34173.633176
Violation type: Priority violation and Speed violation,18573.525286
Output of simulation (planning): Mission length for MV (V0),16534.48677
Output of simulation (planning): POD score for AV (V2),5132.476504
Output of simulation (planning): POD score for AV (V1),4903.126682
Output of simulation (planning): Mission length for AV (V3),4084.73951
Output of simulation (planning): Mission length for AV (V1),2997.000629


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Coordination strategy: Stops,2085.591831
Coordination strategy: Change of priorities,813.990141
Output of simulation (planning): POD score for AV (V2),681.477327
Violation type: Priority violation and Speed violation,639.468699
Output of simulation (planning): POD score for MV (V0),561.459931
Violation type: Priority violation,544.483366
Output of simulation (planning): Mission length for AV (V2),171.65148
Output of simulation (planning): Mean POD score for AVs (V1-V3),125.37227
Output of simulation (planning): Mission length for AV (V3),96.683484
Output of simulation (planning): Mission length for AV (V1),83.073528


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Violation type: Priority violation,1310.695066
Output of simulation (planning): Mission length for AV (V1),725.061763
Output of simulation (planning): POD score for MV (V0),667.620882
Output of simulation (planning): POD score for AV (V2),534.086637
Output of simulation (planning): Mission length for MV (V0),528.502114
Output of simulation (planning): Mean POD score for AVs (V1-V3),517.12262
Output of simulation (planning): POD score for AV (V1),495.141926
Output of simulation (planning): Mission length for AV (V2),490.13451
Output of simulation (planning): POD score for AV (V3),445.828572
Output of simulation (planning): Mission length for AV (V3),442.824362


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Output of simulation (planning): POD score for MV (V0),1326.499757
Violation type: Priority violation,653.401216
Output of simulation (planning): Total Mission length for AVs (V1-V3),563.027603
Output of simulation (planning): POD score for AV (V3),504.260217
Output of simulation (planning): POD score for AV (V2),290.056396
Output of simulation (planning): Mission length for MV (V0),275.349073
Output of simulation (planning): Mission length for AV (V2),268.53573
Output of simulation (planning): Mean POD score for AVs (V1-V3),265.274002
Output of simulation (planning): Mission length for AV (V3),241.122154
Coordination strategy: Change of priorities,227.726696


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Violation type: Priority violation,135.895064
Output of simulation (planning): POD score for AV (V3),80.223063
Output of simulation (planning): Mission length for AV (V3),80.205244
Coordination strategy: Stops,75.708376
Output of simulation (planning): POD score for AV (V1),64.412282
Output of simulation (planning): Total Mission length for AVs (V1-V3),62.208081
Coordination strategy: Change of priorities,53.10834
Output of simulation (planning): Mean POD score for AVs (V1-V3),47.371155
Output of simulation (planning): POD score for AV (V2),46.25473
Output of simulation (planning): POD score for MV (V0),41.854218


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Violation type: Priority violation,5148.591754
Output of simulation (planning): POD score for MV (V0),1569.495056
Coordination strategy: Change of priorities,1533.24361
Output of simulation (planning): Mission length for MV (V0),1309.603963
Output of simulation (planning): POD score for AV (V2),1220.928976
Output of simulation (planning): Total Mission length for AVs (V1-V3),1220.048666
Output of simulation (planning): Mission length for AV (V3),1193.287148
Output of simulation (planning): Mission length for AV (V1),1073.458407
Output of simulation (planning): POD score for AV (V3),1001.114326
Output of simulation (planning): Mean POD score for AVs (V1-V3),999.358022


Unnamed: 0_level_0,importance
group,Unnamed: 1_level_1
Violation type: Priority violation and Speed violation,19.701817
Output of simulation (planning): POD score for MV (V0),17.166275
Output of simulation (planning): POD score for AV (V2),16.68634
Coordination strategy: Change of priorities,14.465993
Output of simulation (planning): Mean POD score for AVs (V1-V3),12.909307
Output of simulation (planning): Mission length for AV (V1),12.416659
Violation type: Priority violation,11.754726
Output of simulation (planning): Mission length for AV (V3),11.223038
Output of simulation (planning): POD score for AV (V3),10.136323
Output of simulation (planning): Total Mission length for AVs (V1-V3),10.105499
