In [1]:
import pandas as pd
import os
from tqdm import tqdm
import numpy as np

In [2]:

def read_fixed_demand_output(output_directory, last_iter=-1):
    drt_legs = []
    global_stats = []
    drt_trips_stats = []
    drt_simulated_predictions = []
    
    iter_0_path = os.path.join(output_directory,'ITERS','it.0')
    binned = '0.drt_zonalAndTimeBinWaitingTime.csv' in os.listdir(iter_0_path)
    iter_0_path += '/0.'
    
    
    global_stats = global_stats.append(pd.read_csv(iter_0_path + 'drt_globalStats.csv', sep=';'))
    drt_legs = pd.read_csv(iter_0_path + 'drt_legs_drt.csv', sep=';')
    drt_trips_stats = pd.read_csv(iter_0_path + 'drt_drtTripsStats.csv', sep=';')
    drt_simulated_predictions = pd.read_csv(iter_0_path + 'drt_simulatedTrips.csv', sep=';')

        
    d = {'drt_legs': drt_legs,
         'global_stats': global_stats,
         'drt_trips_stats': drt_trips_stats,
         'drt_simulated_predictions': drt_simulated_predictions
        }
    
    if binned:
        binned_wait_time = pd.read_csv(iter_0_path + 'drt_zonalAndTimeBinWaitingTime.csv', sep=';')
        binned_delay_factor = pd.read_csv(iter_0_path + 'drt_distanceAndTimeBinDelayFactor.csv', sep=';')
        d['binned_wait_time'] = binned_wait_time
        d['binned_delay_factor'] = binned_delay_factor
    
    #Read stopwatch.txt
    stopwatch = pd.read_csv(os.path.join(output_directory,'stopwatch.txt'), sep='\t')
    d['stopwatch'] = stopwatch
    
    return d

In [3]:
scratch_folder = '/cluster/scratch/mfrancesc/IVT/SA_scenario'

In [4]:
configs = ['global_average', 'zonal_hex', 'zonal_square', 'zonal_TAZ', 'dynamic_CN_k500_expDecay', 'dynamic_CN_k1000_expDecay', 'dynamic_CN_k2000_expDecay', 'dynamic_PN_km500_expDecay', 'dynamic_PN_km1000_expDecay', 'dynamic_PN_km2000_expDecay', 'dynamic_FD_expDecay']

In [5]:
fd_values = [100000, 200000, 300000, 400000, 500000]

In [6]:
outputs = {}
for fd_value in tqdm(fd_values):
    outputs[fd_value] = {}
    for c in configs:
        path = os.path.join(scratch_folder,f'fixedDemand/fixedDemand_{fd_value}/{c}')
        outputs[fd_value][c] = read_fixed_demand_output(path)

100%|██████████| 5/5 [00:57<00:00, 11.40s/it]


In [7]:
def combine_simulated_predictions_and_stats(simulated_predictions, stats):
    simulated_predictions = simulated_predictions.copy(deep=True)
    stats = stats.copy(deep=True)
    
    simulated_predictions = simulated_predictions.add_suffix('_pred')
    simulated_predictions = simulated_predictions.rename(index=str, columns={'personId_pred':'personId', 'startTime_pred':'startTime'})

    stats = stats.add_suffix('_stats')
    stats = stats.rename(index=str, columns={'personId_stats':'personId', 'startTime_stats':'startTime'})
    
    return pd.merge(simulated_predictions, stats, on=['personId','startTime'], validate='one_to_one')

In [8]:
def compute_stats_table(outputs_dict):
    description = pd.DataFrame()
    fd_values = list(outputs_dict.keys())
    configs = list(outputs_dict[fd_values[0]].keys())
    
    for fd_value in fd_values:
        for c in configs:
            o = outputs_dict[fd_value][c]
            simulated = o['drt_simulated_predictions']
            #merged = combine_simulated_predictions_and_stats(o['drt_simulated_predictions'], o['drt_trips_stats'])
            true_labels = simulated['real_waiTime'] / 60
            predicted_labels = simulated['predicted_waitTime'] / 60
            errors = true_labels - predicted_labels
            current_description = pd.Series(errors).describe(percentiles=[0.25, 0.5, 0.75, 0.95, 0.99])
            # Add mse, rmse, mae
            current_description['MSE'] = np.mean(errors**2)
            current_description['RMSE'] = np.sqrt(current_description['MSE'])
            current_description['MAE'] = np.mean(np.abs(errors))
            # Add percentage of errors below 0
            current_description['% errors < 0'] = np.sum(errors < 0)/len(errors) * 100
            current_description = pd.DataFrame(current_description).T
            full_title = str(fd_value) + '_' + c
            current_description['Demand'] = fd_value
            current_description['Method'] = c
            description = pd.concat([description, current_description], axis=0)
    description = description.set_index(['Demand', 'Method'])
    return description

In [9]:
compute_stats_table(outputs)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,95%,99%,max,MSE,RMSE,MAE,% errors < 0
Demand,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
100000,global_average,98183.0,-9.193062e-16,11.597574,-15.294541,-9.919541,-2.311208,8.122125,21.805459,28.741792,124.488792,134.502353,11.597515,9.68533,56.370247
100000,zonal_hex,98183.0,-4.216231e-16,7.72069,-34.652905,-3.331331,0.188485,4.47215,11.58844,17.423049,122.476508,59.608446,7.720651,5.569037,48.482935
100000,zonal_square,98183.0,-3.193428e-16,7.422315,-35.072588,-2.912423,0.22,4.086622,10.937829,17.100037,122.29579,55.090206,7.422278,5.221515,47.723129
100000,zonal_TAZ,98183.0,-0.9311822,7.312045,-37.793939,-4.461111,-0.091667,2.853704,10.498262,17.556489,124.488792,54.332556,7.371062,5.243582,50.805129
100000,dynamic_CN_k500_expDecay,98183.0,0.1446051,7.499504,-32.423958,-3.008661,0.292536,4.537491,11.264782,16.980163,121.851736,56.262902,7.50086,5.40755,47.892201
100000,dynamic_CN_k1000_expDecay,98183.0,0.139231,7.522551,-32.162339,-3.051397,0.28108,4.56149,11.309316,17.068145,121.880009,56.607578,7.523801,5.436538,47.984885
100000,dynamic_CN_k2000_expDecay,98183.0,0.140129,7.527326,-32.12979,-3.06145,0.277132,4.570173,11.32465,17.081162,121.873151,56.679695,7.528592,5.443999,48.00933
100000,dynamic_PN_km500_expDecay,98183.0,0.1465162,7.481747,-32.63821,-2.978191,0.29583,4.496856,11.22978,16.997619,121.865655,55.997437,7.483144,5.380212,47.723129
100000,dynamic_PN_km1000_expDecay,98183.0,0.1464348,7.481762,-32.63821,-2.978232,0.296826,4.497003,11.230886,16.991818,121.865655,55.997643,7.483157,5.380252,47.726185
100000,dynamic_PN_km2000_expDecay,98183.0,0.1465858,7.48173,-32.63821,-2.978395,0.296199,4.496656,11.230367,16.991818,121.865655,55.997203,7.483128,5.380272,47.720074


In [16]:
def get_stats_table(output_dict):
    index_list = [("Number of rides", ""),
                    ("Wait time (min)", "Mean"),
                    ("Wait time (min)", "Median"),
                    ("Wait time (min)", "Std"),
                    ("Wait time (min)", "75-percentile"),
                    ("Wait time (min)", "99-percentile"),
                    ("Wait time (min)", "Max"),
                    ("Travel time (min)", "Mean"),
                    ("Travel time (min)", "Median"),
                    ("Travel time (min)", "Std"),
                    ("Travel time (min)", "75-percentile"),
                    ("Travel time (min)", "99-percentile"),
                    ("Travel time (min)", "Max"),
                    #("Mean distance (km)", ""),
                    ("Mean direct distance (km)", ""),
                    ("Average detour factor (time wise)", ""),
                    ("Total execution time", ""),
                  ]
    it_drt_trip_stats = output_dict['drt_trips_stats']
    index = pd.MultiIndex.from_tuples(index_list)
    stats = pd.DataFrame(index=index)
    stats.loc[("Number of rides", ""), "Value"] = len(it_drt_trip_stats)
    stats.loc[("Wait time (min)", "Mean"), "Value"] = it_drt_trip_stats.waitTime.mean() / 60
    stats.loc[("Wait time (min)", "Median"), "Value"] = it_drt_trip_stats.waitTime.median() / 60
    stats.loc[("Wait time (min)", "Std"), "Value"] = it_drt_trip_stats.waitTime.std() / 60
    stats.loc[("Wait time (min)", "75-percentile"), "Value"] = it_drt_trip_stats.waitTime.quantile(0.75) / 60
    stats.loc[("Wait time (min)", "99-percentile"), "Value"] = it_drt_trip_stats.waitTime.quantile(0.99) / 60
    stats.loc[("Wait time (min)", "Max"), "Value"] = it_drt_trip_stats.waitTime.max() / 60
    stats.loc[("Travel time (min)", "Mean"), "Value"] = it_drt_trip_stats.totalTravelTime.mean() / 60
    stats.loc[("Travel time (min)", "Median"), "Value"] = it_drt_trip_stats.totalTravelTime.median() / 60
    stats.loc[("Travel time (min)", "Std"), "Value"] = it_drt_trip_stats.totalTravelTime.std() / 60
    stats.loc[("Travel time (min)", "75-percentile"), "Value"] = it_drt_trip_stats.totalTravelTime.quantile(0.75) / 60
    stats.loc[("Travel time (min)", "99-percentile"), "Value"] = it_drt_trip_stats.totalTravelTime.quantile(0.99) / 60
    stats.loc[("Travel time (min)", "Max"), "Value"] = it_drt_trip_stats.totalTravelTime.max() / 60
    #stats.loc[("Mean distance (km)", ""), "Value"] = it_drt_trip_stats.distance.mean() / 1000
    stats.loc[("Mean direct distance (km)", ""), "Value"] = it_drt_trip_stats.euclideanDistance.mean() / 1000
    stats.loc[("Average detour factor (time wise)", ""), "Value"] = it_drt_trip_stats.delayFactor.mean()
    stats.loc[("Total execution time", ""), "Value"] = str(pd.to_timedelta(output_dict['stopwatch']['iteration']).sum())
    return stats

def get_multiple_stats_table(outputs):
    fd_values = list(outputs.keys())
    configs = list(outputs[fd_values[0]].keys())
    config_to_table = configs[0]
    tables = []
    for fd_value in fd_values:
        t = get_stats_table(outputs[fd_value][config_to_table])
        t.rename(columns={'Value': str(fd_value)}, inplace=True)
        tables.append(t)
    
    return pd.concat(tables, axis=1)

In [17]:
get_multiple_stats_table(outputs)

Unnamed: 0,Unnamed: 1,100000,200000,300000,400000,500000
Number of rides,,98183,196199,294281,392280,490533
Wait time (min),Mean,15.3112,14.388,13.6914,13.6004,13.0946
Wait time (min),Median,13,11.8667,11.3333,11.1167,10.4667
Wait time (min),Std,11.5976,11.4976,11.1279,11.5494,11.6881
Wait time (min),75-percentile,23.4333,21.75,20.3,19.9833,19.2333
Wait time (min),99-percentile,44.053,43.85,43.4333,43.8667,43.8667
Wait time (min),Max,139.8,242.35,198.267,298.55,305.317
Travel time (min),Mean,15.1508,15.7669,16.0646,16.3695,16.4564
Travel time (min),Median,12,12.5333,12.8667,13.1833,13.3333
Travel time (min),Std,11.9112,12.3245,12.4732,12.6823,12.6975
