### A notebook for data analysis of the sensitivity analysis

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as st

In [None]:
# Specify the experiment wanted
value_to_vary_name = "earthquake-magnitude"
v = 0.32
replications = 25

# Read the pickle
series_df = pd.read_pickle(f'../results/sensitivity/sens_series_{value_to_vary_name}_{v}_{replications}r_df.pickle')
series_df.head()

In [None]:
# Plot for one run all the variables
series_df["recovered-with-help"].plot()

In [None]:
# Take the average of all the runs and plot those
series_df_agg = series_df.groupby(level=[0], axis="columns").mean()
series_df_agg.plot()

## Full sensitivity analysis

### Load data

In [None]:
# Load a dictionary with the default values and if they are rounded or not
default_values = {
    # variable-name: (number, rounded to int or not)
    "call-limit": (True, False),
    "earthquake-magnitude": (0.4, False),
    "amount-ambulances": (40, True),
    "probability-call-112": (1, False),
    "amount-hospitals": (10, True),
    "hospital-capacity": (100, True),
    "hospital-filling-percentage-t0": (60, False),
    "initial-ambulance-search-radius": (5, True),
    "percentage-concrete-buildings": (70, False),
    "high-damage-road-blocked-chance": (10, False),
    "collapsed-road-blocked-chance": (25, False),
    "max-concurrent-calls": (50, True),
    "average-call-time": (2.5, False),
    "amount-drones": (10, True),
    "drone-speed": (0.5, False),
    "drone-range": (45, False),
    "ambulance-reroute-frequency": (5, True),
    # "drone-view-radius": (25, True),
}

In [None]:
# Create a dictionary with all the input values used
input_values = {}
amount_to_vary = [0.8, 1.25]

for var, val in default_values.items():
    input_values[var] = [round(val[0] * v, 5) for v in amount_to_vary]
    if val[1]:
        input_values[var] = [int(v) for v in input_values[var]]
input_values["call-limit"] = [True, False]
input_values

In [None]:
reference = f'../results/sensitivity/sens_series_call-limit_True_{replications}r_df.pickle'

In [None]:
# Read all the pickles into a dictionary of dataframes
dfs = {}
for k, vs in input_values.items():
    for i, v in enumerate(vs):
        dfs[(k, v)] = pd.read_pickle(f'../results/sensitivity/sens_series_{k}_{v}_{replications}r_df.pickle')
        if i == 0:  # Add the reference df after each first value
            dfs[(k, default_values[k][0])] = pd.read_pickle(reference)

In [None]:
KPIs = list(set(dfs[('call-limit', True)].droplevel(1, axis="columns").columns))
print(KPIs)

### Drop first experiment run (corrupted)
The first experiment run from each experiment is corrupted, because the input values weren't set right. Therefor they are dropped here. 24 runs remain.

In [None]:
for key, df in dfs.items():
    dfs[key] = df.drop(labels=0, axis="columns", level=1)

### Calculate means

In [None]:
# Create a dictionary with the means
mean_dict = {}
for (k, v), df_t in dfs.items():
    mean_dict[(k, v)] = df_t.iloc[720]

In [None]:
mean_df = pd.DataFrame.from_dict(mean_dict).T
mean_df = mean_df.sort_index(axis="columns", level=0)
mean_df

In [None]:
mean_df.to_excel("test.xlsx")

### Normalize sensitivity

In [None]:
mean_df_s = mean_df.T.drop(columns="call-limit", level=0)
mean_df_s

In [None]:
dev_df = pd.DataFrame(index=mean_df_s.index)
for key in list(input_values.keys())[1::]:
    ref = mean_df_s[key].columns[1]
    for i, column in enumerate(mean_df_s[key].columns):
        if i != 1:
            ratio = column/ref
            dev_df[key, ratio] = (mean_df_s[key][column] - mean_df_s[key][ref]) / mean_df_s[key][ref]
dev_df.columns = pd.MultiIndex.from_tuples(dev_df.columns)
dev_df.T

In [None]:
KPIs_to_drop = ['fraction-destroyed-streets-spotted', 'fraction-called-in', 'number-destroyed-streets-spotted']
KPIs2 = [k for k in KPIs if k not in KPIs_to_drop]

In [None]:
plot_df = {}
for k in KPIs2:
    plot_df[k] = dev_df.T[k].stack(level=0).reset_index()
    plot_df[k]["level_1"] = plot_df[k]["level_1"].round(2)
plot_df[KPIs2[0]].head()

In [None]:
fig, axes = plt.subplots(1,2)
fig.set_size_inches(12,5)
g = {}
for i, KPI in enumerate(KPIs2[:2]):
    g[KPI] = sns.pointplot(plot_df[KPI], y=0, x="level_0", hue="level_1", errorbar=('ci', 95), join=False, dodge=True, ax=axes[i])
    g[KPI].set_title(f"Influence on {KPI}")
    g[KPI].set_xticklabels(g[KPI].get_xticklabels(), rotation=90)
    g[KPI].set_xlabel("Variation in input value")
    g[KPI].set_ylabel(f"Effect on KPI {KPI}")
fig.suptitle("Effect of variation of input values on KPIs (with 95% confidence interval)")
fig.savefig("../images/sensitivity_pointplots_1.svg")

In [None]:
fig, axes = plt.subplots(1,2)
fig.set_size_inches(12,5)
g = {}
for i, KPI in enumerate(KPIs2[2:]):
    g[KPI] = sns.pointplot(plot_df[KPI], y=0, x="level_0", hue="level_1", errorbar=('ci', 95), join=False, dodge=True, ax=axes[i])
    g[KPI].set_title(f"Influence on {KPI}")
    g[KPI].set_xticklabels(g[KPI].get_xticklabels(), rotation=90)
    g[KPI].set_xlabel("Variation in input value")
    g[KPI].set_ylabel(f"Effect on KPI {KPI}")
fig.suptitle("Effect of variation of input values on KPIs (with 95% confidence interval)")
fig.savefig("../images/sensitivity_pointplots_2.svg")

### Extreme values

In [None]:
ev_inputs = [
    "earthquake-magnitude",
    "amount-ambulances",
    "amount-hospitals",
    # "drone-view-radius",
]

In [None]:
# Create a dictionary with all the input values used
ev_input_values = {}
amount_to_vary = [0.2, 1.8]

for var, val in default_values.items():
    if var in ev_inputs:
        ev_input_values[var] = [round(val[0] * v, 5) for v in amount_to_vary]
        if val[1]:
            ev_input_values[var] = [int(v) for v in ev_input_values[var]]
ev_input_values

In [None]:
# Read all the pickles into a dictionary of dataframes
ev_dfs = {}
for k, vs in ev_input_values.items():
    for i, v in enumerate(vs):
        ev_dfs[(k, v)] = pd.read_pickle(f'../results/sensitivity/sens_series_{k}_{v}_{10}r_df.pickle')

In [None]:
# Create a dictionary with the means
ev_mean_dict = {}
for (k, v), df_t in ev_dfs.items():
    ev_mean_dict[(k, v)] = df_t.iloc[720].to_dict()

ev_mean_df = pd.DataFrame.from_dict(ev_mean_dict).T
ev_mean_df.sort_index(axis="columns", level=0)

In [None]:
KPIs_to_drop = ['fraction-destroyed-streets-spotted', 'fraction-called-in', 'number-destroyed-streets-spotted']

In [None]:
ev_mean_df_s = ev_mean_df.drop(columns=KPIs_to_drop, axis=1, level=0).T
ev_mean_df_s = ev_mean_df_s.sort_index(axis="index", level=0)
ev_mean_df_s

In [None]:
mean_df_s2 = mean_df_s.T.drop(columns=KPIs_to_drop, axis=1, level=0).T
mean_df_s2 = mean_df_s2.sort_index(axis="index", level=0)
mean_df_s2

In [None]:
comb_mean_df = pd.concat([ev_mean_df_s, mean_df_s2], axis=1)
comb_mean_df.sort_index(level=[1], axis="columns", inplace=True)
comb_mean_df

In [None]:
ev_dev_df = pd.DataFrame(index=comb_mean_df.index)
for key in list(ev_input_values.keys()):
    ref = comb_mean_df[key].columns[2]
    ref_mean = comb_mean_df[key][ref].mean()
    for i, column in enumerate(comb_mean_df[key].columns):
        ratio = column / ref
        ev_dev_df[key, ratio] = (comb_mean_df[key][column] - comb_mean_df[key][ref]) / comb_mean_df[key][ref]
ev_dev_df.columns = pd.MultiIndex.from_tuples(ev_dev_df.columns)
ev_dev_df.T

In [None]:
fig, axes = plt.subplots(1,(len(ev_inputs)))
fig.set_size_inches(16,5)
g = {}
for i, in_var in enumerate(ev_inputs):
    g[in_var] = sns.lineplot(ev_dev_df[in_var].T, ax=axes[i], markers=True, errorbar=("ci", 95), err_style="band")
    g[in_var].set_title(f"Influence of {in_var} on KPIs")
    g[in_var].set_xlabel("Variation in input value")
    g[in_var].set_ylabel("Effect in output value (KPI)")
fig.suptitle("Effect of variation of input values on KPIs (with 95% confidence interval)")
fig.savefig("../images/extreme_values_plots_ci.svg")