## Travel time analysis

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np

### Data reading

In [2]:
# Read in travel time results and SourceSink data
scenarios = range(0,5)
results = {}
for s in scenarios:
    results[s] = pd.read_csv(f"../experiments/results_scenario_{s}.csv", index_col=0)
    results[s].drop("VehicleID", axis="columns", inplace=True)
source_sinks = pd.read_csv(f"../experiments/source_data.csv", index_col=0)

In [3]:
display(results[4].head(5))
display(source_sinks.head(5))

Unnamed: 0,Travel_Time,Startpoint,Endpoint
0,6,SourceSink13034,13040
1,6,SourceSink10000,11303
2,12,SourceSink11303,12254
3,9,SourceSink13012,13022
4,9,SourceSink13022,13012


Unnamed: 0,Road,Coordinates
10000,N1,"(90.443333, 23.7060278)"
11302,N1,"(92.298083, 20.8629167)"
11303,N2,"(90.5214438, 23.7059167)"
12253,N2,"(92.0176382, 25.1570556)"
12254,N105,"(90.5466108, 23.6904163)"


### Global average travel time

In [4]:
#calculating 95% confidence intervals of the economic losses
confidence = 0.95

average = {}
low_bound = {}
high_bound = {}
interval_range = {}
economic_interval = {}

for i in scenarios:
    results_list = results[i]["Travel_Time"].tolist()
    average[i] = np.mean(results_list)
    low_bound[i], high_bound[i] = st.norm.interval(alpha=confidence, loc=average[i], scale=st.sem(results_list))
    interval_range[i] = high_bound[i] - low_bound[i]
    economic_interval[i] = interval_range[i] / average[i] * 100

    print(f'Average travel time (95% confidence interval) for scenario {i}: {average[i]:.3f} ({low_bound[i]:.3f}, {high_bound[i]:.3f}), economic interval: {economic_interval[i]:.3f}%')

df = pd.DataFrame({
    "Average (min)": average,
    "Low bound (min)": low_bound,
    "High bound (min)": high_bound,
    "Interval range (min)": interval_range,
    "Economic interval (%)": economic_interval})
df.to_csv("../results/travel_times.csv", index_label="Scenario")
df

Average travel time (95% confidence interval) for scenario 0: 378.247 (377.586, 378.909), economic interval: 0.350%
Average travel time (95% confidence interval) for scenario 1: 380.893 (380.225, 381.560), economic interval: 0.351%
Average travel time (95% confidence interval) for scenario 2: 388.969 (387.015, 390.923), economic interval: 1.005%
Average travel time (95% confidence interval) for scenario 3: 510.638 (507.942, 513.333), economic interval: 1.056%
Average travel time (95% confidence interval) for scenario 4: 992.122 (990.269, 993.975), economic interval: 0.373%


Unnamed: 0,Average (min),Low bound (min),High bound (min),Interval range (min),Economic interval (%)
0,378.24713,377.585546,378.908714,1.323168,0.349816
1,380.892805,380.225254,381.560357,1.335103,0.350519
2,388.968883,387.014987,390.922779,3.907793,1.004654
3,510.637547,507.942086,513.333008,5.390923,1.055724
4,992.122083,990.269402,993.974764,3.705362,0.373478


### Average travel time per route

In [5]:
# Test for getting average travel time between two points
df = results[3]
dfi = df.loc[df['Startpoint'] == f'SourceSink{11303}']
dfi2 = dfi.loc[dfi['Endpoint'] == 12800]
dfi2["Travel_Time"].mean()

305.7307692307692

In [6]:
# Create lists of start and end points
start_points = set([int(i.replace('SourceSink', '')) for i in results[2]["Startpoint"].tolist()])
end_points = set(results[2]["Endpoint"].tolist())

In [7]:
# For all scenarios, for all startpoints (sources) and for all endpoints (sinks), report the average travel time.
tt_roads = {}
for s, df in results.items():
    tt_roads[s] = {}
    for sp in start_points:
        dfi = df.loc[df['Startpoint'] == f'SourceSink{sp}']
        for ep in end_points:
            dfi2 = dfi.loc[dfi['Endpoint'] == ep]
            tt_roads[s][(sp,ep)] = dfi2["Travel_Time"].mean()

In [8]:
# Create dataframe and export to CSV
df_tt_roads = pd.DataFrame.from_dict(tt_roads)
df_tt_roads.to_csv("../results/travel_time_per_route.csv")
df_tt_roads

Unnamed: 0,Unnamed: 1,0,1,2,3,4
12800,12800,,,,,
12800,10000,223.0,223.000000,242.222222,315.363636,643.069519
12800,11302,781.0,798.721893,952.769231,1347.785714,2462.522124
12800,11303,221.0,221.000000,233.368421,314.461538,640.137255
12800,12354,261.0,261.000000,276.966667,342.800000,705.956790
...,...,...,...,...,...,...
12799,12657,158.0,158.000000,163.000000,212.859155,411.198992
12799,12656,191.0,191.000000,205.600000,285.160714,529.359712
12799,12541,460.0,463.440181,502.750000,746.740000,1332.827982
12799,12542,313.0,313.000000,334.924528,485.911765,850.087282


**To-do:**
 - Make start and end points human readable (for example, road_name + north or south for each source)
 - Check for which routes the largest increase in travel time takes place between routes
 - Create some nice plots and/or graphs