## Travel time analysis

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np

### Data reading

In [2]:
# Read in travel time results and SourceSink data
scenarios = range(0,5)
results = {}
for s in scenarios:
    results[s] = pd.read_csv(f"../experiments/results_scenario_{s}.csv", index_col=0)
    results[s].drop("VehicleID", axis="columns", inplace=True)
source_sinks = pd.read_csv(f"../experiments/source_data.csv", index_col=0)

In [3]:
results[2].head()

Unnamed: 0,Travel_Time,Startpoint,Endpoint
0,31,SourceSink13012,13040
1,9,SourceSink13012,13022
2,31,SourceSink13040,13012
3,24,SourceSink13023,13034
4,13,SourceSink13012,13033


#### Create human readable labels for sourcesinks

In [4]:
# Split the coordinates in latitude and longitude
source_sinks["lat"] = source_sinks["Coordinates"].apply(lambda x: float(x.split(',')[1].replace(')', '')))
source_sinks["lon"] = source_sinks["Coordinates"].apply(lambda x: float(x.split(',')[0].replace('(', '')))

# Create lists of roads, dictionary for north south and east west
road_list = source_sinks["Road"].unique().tolist()
sources_NS = {}
sources_WE = {}

# Check which sourcesink the the northmost etc. for each road
for road in road_list:
    df = source_sinks.loc[source_sinks["Road"] == road]
    sources_NS[df.loc[df['lat'].idxmax()].name]  = "North"
    sources_NS[df.loc[df['lat'].idxmin()].name]  = "South"
    sources_WE[df.loc[df['lon'].idxmax()].name]  = "East"
    sources_WE[df.loc[df['lon'].idxmin()].name]  = "West"

# Add values to dataframe and create labels
source_sinks["NS"] = pd.Series(sources_NS)
source_sinks["WE"] = pd.Series(sources_WE)
source_sinks["Label"] = source_sinks["Road"] + '_' + source_sinks["NS"] + source_sinks["WE"]

# Create dictionary with labels and print dataframe head
s_name_dict = source_sinks["Label"].to_dict()
source_sinks.head(5)

Unnamed: 0,Road,Coordinates,lat,lon,NS,WE,Label
10000,N1,"(90.443333, 23.7060278)",23.706028,90.443333,North,West,N1_NorthWest
11302,N1,"(92.298083, 20.8629167)",20.862917,92.298083,South,East,N1_SouthEast
11303,N2,"(90.5214438, 23.7059167)",23.705917,90.521444,South,West,N2_SouthWest
12253,N2,"(92.0176382, 25.1570556)",25.157056,92.017638,North,East,N2_NorthEast
12254,N105,"(90.5466108, 23.6904163)",23.690416,90.546611,South,East,N105_SouthEast


### Global average travel time

In [5]:
#calculating 95% confidence intervals of the economic losses
confidence = 0.95

average = {}
low_bound = {}
high_bound = {}
interval_range = {}
economic_interval = {}

for i in scenarios:
    results_list = results[i]["Travel_Time"].tolist()
    average[i] = np.mean(results_list)
    low_bound[i], high_bound[i] = st.norm.interval(alpha=confidence, loc=average[i], scale=st.sem(results_list))
    interval_range[i] = high_bound[i] - low_bound[i]
    economic_interval[i] = interval_range[i] / average[i] * 100

    print(f'Average travel time (95% confidence interval) for scenario {i}: {average[i]:.3f} ({low_bound[i]:.3f}, {high_bound[i]:.3f}), economic interval: {economic_interval[i]:.3f}%')

df = pd.DataFrame({
    "Average (min)": average,
    "Low bound (min)": low_bound,
    "High bound (min)": high_bound,
    "Interval range (min)": interval_range,
    "Economic interval (%)": economic_interval})
df.to_csv("../results/travel_times.csv", index_label="Scenario")
df

Average travel time (95% confidence interval) for scenario 0: 378.247 (377.586, 378.909), economic interval: 0.350%
Average travel time (95% confidence interval) for scenario 1: 380.893 (380.225, 381.560), economic interval: 0.351%
Average travel time (95% confidence interval) for scenario 2: 426.062 (425.300, 426.823), economic interval: 0.357%
Average travel time (95% confidence interval) for scenario 3: 558.409 (557.393, 559.425), economic interval: 0.364%
Average travel time (95% confidence interval) for scenario 4: 992.122 (990.269, 993.975), economic interval: 0.373%


Unnamed: 0,Average (min),Low bound (min),High bound (min),Interval range (min),Economic interval (%)
0,378.24713,377.585546,378.908714,1.323168,0.349816
1,380.892805,380.225254,381.560357,1.335103,0.350519
2,426.061629,425.300354,426.822903,1.522549,0.357354
3,558.408991,557.392545,559.425438,2.032893,0.364051
4,992.122083,990.269402,993.974764,3.705362,0.373478


### Average travel time per route

In [6]:
# Test for getting average travel time between two points
df = results[3]
dfi = df.loc[df['Startpoint'] == f'SourceSink{11303}']
dfi2 = dfi.loc[dfi['Endpoint'] == 12800]
dfi2["Travel_Time"].mean()

325.59864864864863

In [7]:
# Create lists of start and end points
start_points = set([int(i.replace('SourceSink', '')) for i in results[2]["Startpoint"].tolist()])
end_points = set(results[2]["Endpoint"].tolist())

In [8]:
# For all scenarios, for all startpoints (sources) and for all endpoints (sinks), report the average travel time.
tt_roads = {}
for s, df in results.items():
    tt_roads[s] = {}
    for sp in start_points:
        dfi = df.loc[df['Startpoint'] == f'SourceSink{sp}']
        sp_name = s_name_dict[sp]
        for ep in end_points:
            dfi2 = dfi.loc[dfi['Endpoint'] == ep]
            tt_roads[s][(sp_name,s_name_dict[ep])] = dfi2["Travel_Time"].mean()

In [9]:
# Create dataframe and export to CSV
df_tt_roads = pd.DataFrame.from_dict(tt_roads)
df_tt_roads.to_csv("../results/travel_time_per_route.csv")
df_tt_roads

Unnamed: 0,Unnamed: 1,0,1,2,3,4
N204_SouthWest,N204_SouthWest,,,,,
N204_SouthWest,N1_NorthWest,223.0,223.000000,253.281609,325.339744,643.069519
N204_SouthWest,N1_SouthEast,781.0,798.721893,949.264706,1277.264706,2462.522124
N204_SouthWest,N2_SouthWest,221.0,221.000000,250.584211,326.767956,640.137255
N204_SouthWest,N105_NorthWest,261.0,261.000000,287.258065,362.274725,705.956790
...,...,...,...,...,...,...
N110_NorthWest,N108_SouthWest,158.0,158.000000,176.204663,196.366667,411.198992
N110_NorthWest,N107_SouthEast,191.0,191.000000,218.421308,256.979695,529.359712
N110_NorthWest,N102_NorthWest,460.0,463.440181,520.698598,689.305556,1332.827982
N110_NorthWest,N104_NorthEast,313.0,313.000000,352.556391,438.481108,850.087282


In [10]:
# Create a dataframe with the travel time delays
tt_delays = df_tt_roads.div(df_tt_roads[0].values, axis=0)
tt_delays.sort_values(4, ascending=False)

Unnamed: 0,Unnamed: 1,0,1,2,3,4
N107_SouthEast,N107_NorthWest,1.0,1.247841,1.689016,2.942283,4.634306
N107_NorthWest,N107_SouthEast,1.0,1.290537,1.689282,2.812285,4.141019
N107_NorthWest,N108_SouthWest,1.0,1.061428,1.312734,2.054614,4.094919
N108_SouthWest,N107_NorthWest,1.0,1.071847,1.314370,2.050868,4.057692
N1_SouthEast,N110_SouthEast,1.0,1.049956,1.416983,1.969317,3.724178
...,...,...,...,...,...,...
N108_SouthWest,N108_SouthWest,,,,,
N107_SouthEast,N107_SouthEast,,,,,
N102_NorthWest,N102_NorthWest,,,,,
N104_NorthEast,N104_NorthEast,,,,,


In [11]:
# Print a full delay table for a scenario
tt_delays[4].unstack()

Unnamed: 0,N102_NorthWest,N102_SouthEast,N104_NorthEast,N104_SouthWest,N105_NorthWest,N105_SouthEast,N107_NorthWest,N107_SouthEast,N108_NorthEast,N108_SouthWest,...,N206_NorthEast,N206_SouthWest,N207_NorthEast,N207_SouthWest,N209_NorthWest,N209_SouthEast,N210_NorthEast,N210_SouthWest,N2_NorthEast,N2_SouthWest
N102_NorthWest,,3.494685,3.259997,3.21439,2.738664,3.053182,3.064475,2.934428,2.950718,3.051276,...,2.479684,2.464778,2.634708,2.410055,2.535805,2.527185,2.49657,2.469015,2.507974,3.263621
N102_SouthEast,3.526453,,2.647338,2.861637,2.739888,3.357569,2.814891,2.618617,2.772728,2.81141,...,2.75209,2.730305,2.865556,2.730642,2.763958,2.768557,2.731413,2.716643,2.716069,3.350637
N104_NorthEast,3.249455,2.641814,,2.870999,2.777264,3.109597,2.759199,2.521369,2.803038,2.794768,...,2.754007,2.730678,2.832111,2.718654,2.744247,2.733733,2.715607,2.689393,2.733504,3.095552
N104_SouthWest,3.202593,2.792597,2.877006,,2.828275,3.151327,2.836157,2.663646,2.860455,2.856783,...,2.759269,2.739628,2.836285,2.735393,2.761436,2.803493,2.750556,2.745863,2.760362,3.103168
N105_NorthWest,2.79297,2.82902,2.802554,2.860374,,1.789081,2.810607,2.694622,2.813933,2.806104,...,2.569178,2.600277,2.667669,2.562602,2.58475,2.615603,2.58241,2.582025,2.595167,1.726322
N105_SouthEast,3.110737,3.443986,3.15003,3.142566,1.805807,,2.995506,2.894375,2.957601,2.976988,...,2.67217,2.688653,2.743959,2.642622,2.687246,2.651487,2.667833,2.619903,2.704565,1.782865
N107_NorthWest,3.099423,2.813721,2.729828,2.842763,2.788829,3.016474,,4.141019,3.450892,4.094919,...,2.735918,2.741504,2.765192,2.763627,2.745488,2.748655,2.751058,2.744015,2.749297,2.963695
N107_SouthEast,2.975105,2.623393,2.61537,2.698749,2.688873,2.871622,4.634306,,3.121138,3.550298,...,2.704267,2.683942,2.784239,2.683445,2.705689,2.682757,2.71138,2.69397,2.672181,2.85543
N108_NorthEast,3.01376,2.799473,2.827784,2.827507,2.836859,2.978428,3.419458,3.122484,,2.561762,...,2.710036,2.74112,2.816742,2.747285,2.729496,2.740852,2.739319,2.710057,2.714062,2.938916
N108_SouthWest,2.997092,2.836411,2.780951,2.845604,2.779548,3.009065,4.057692,3.370425,2.733784,,...,2.747727,2.752129,2.821542,2.735767,2.741301,2.744624,2.715458,2.716765,2.738141,2.958088


**To-do:**
 - DONE: Make start and end points human readable (for example, road_name + north or south for each source)
 - DONE: Check for which routes the largest increase in travel time takes place between routes
 - Create some nice tables, plots and/or graphs