# Imports and config

In [None]:
import os

import numpy as np
import pandas as pd

directory = os.getcwd().split('/')[-1]

if directory == 'notebooks':
    %cd ..

In [None]:
# Matplotlib & seaborn setup
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
import seaborn as sns


sns.set(rc={'figure.figsize':(11, 4)})
sns.set_style('ticks')

FILE_EXTENSION = '.pdf'

font = {
    'family': 'serif',
    'size': 14
}
figure = {
    'autolayout': True,
}
matplotlib.rc('font', **font)
matplotlib.rc('figure', **figure)

if FILE_EXTENSION == '.pdf':
    matplotlib.use('PDF')

print(f'matplotlib backend: {matplotlib.get_backend()}')

In [None]:
incidents = pd.read_csv('proprietary_data/processed_data.csv', index_col=0, parse_dates=True).sort_index()
incidents

In [None]:
incidents['ank_hentested'] = pd.to_datetime(incidents['ank_hentested'], dayfirst=True)
incidents['ank_hentested']

In [None]:
incidents = incidents.drop(incidents[incidents.ank_hentested.isna()].index)
incidents

In [None]:
response_times = incidents.ank_hentested - incidents.index
response_times = response_times.dt.total_seconds().astype(int)
response_times.name = 'response_times' 
response_times.plot()

In [None]:
results = pd.read_csv('../output/simulation/first_experiment_response_times.csv', index_col=0, parse_dates=True).sort_index()
results = results.drop(results[(results.Uniform > 50000) | (results.UniformRandom > 50000) | (results.Random > 50000) | (results.PopulationProportionate > 50000)].index)
results = results.PopulationProportionate
results = results.dropna()
results

## Line up time series

In [None]:
start_date, end_date = results.index[0], results.index[-1]
response_times = response_times[(response_times.index > start_date) & (response_times.index <= end_date)]
response_times

In [None]:
merged = pd.merge_asof(response_times, results, left_index=True, right_index=True, direction='nearest')
r = merged.corr()['PopulationProportionate']['response_times']
r

In [None]:
ax = response_times.plot(label='historic')
results.plot(label='simulated', ax=ax)

ax.set(xlabel='day', ylabel='response time / (s)')
ax.set_title(f'Overall Pearson $r$ = {r:.4f}')

plt.legend()
plt.savefig(f'../output/validation/response_times_correlation{FILE_EXTENSION}')

# Resampling

In [None]:
response_times_hourly = pd.Series(response_times.resample('H').mean())
response_times_hourly = response_times_hourly.sort_index()

results_hourly = pd.Series(results.resample('H').mean())
results_hourly = results_hourly.sort_index()

ax = response_times_hourly.plot(label='historic')
results_hourly.plot(label='simulated')

ax.set(xlabel='Day', ylabel='Mean response time')

plt.legend()
plt.plot()

## Daily incidents

In [None]:
response_times_daily =  pd.Series(response_times.resample('D').mean())
results_daily =  pd.Series(results.resample('D').mean())

ax = response_times_daily.plot(label='historic')
results_daily.plot(label='simulated')

ax.set(xlabel='Day', ylabel='Mean response time')

plt.legend()
plt.plot()

## Aggregated weekly average

In [None]:
response_times_weekly_average = response_times_hourly \
    .groupby([response_times_hourly.index.weekday, response_times_hourly.index.hour]) \
    .agg(['mean', 'std'])

reference_week = pd.date_range(start='1/5/2015', end='1/12/2015', closed='left', freq='H')
response_times_weekly_average = response_times_weekly_average.set_index(reference_week)
response_times_weekly_average['weekday'] = response_times_weekly_average.index.weekday

results_weekly_average = results_hourly \
    .groupby([results_hourly.index.weekday, results_hourly.index.hour]) \
    .agg(['mean', 'std'])

reference_week = pd.date_range(start='1/5/2015', end='1/12/2015', closed='left', freq='H')
results_weekly_average = results_weekly_average.set_index(reference_week)
results_weekly_average['weekday'] = results_weekly_average.index.weekday


## Hourly average

In [None]:
ax = response_times_hourly.groupby(response_times_hourly.index.hour).mean().plot.line(label='historic')
results_hourly.groupby(results_hourly.index.hour).mean().plot.line(label='simulated', ax=ax)

ax.set(xlabel='hour of the day', ylabel='mean response time / (s)')

sns.despine()

ax.set_ylim(bottom=0)
ax.set_xlim(left=0)

ax.grid()
ax.xaxis.set_ticks(np.arange(0, 25, 1))

plt.legend()
plt.plot

## Per day average 

In [None]:

ax = sns.lineplot(data=response_times_weekly_average, x=response_times_weekly_average.index, y='mean', label='historic')
sns.lineplot(data=results_weekly_average, x=results_weekly_average.index, y='mean', label='simulated', ax=ax)
ax.set(xlabel='Time of week', ylabel='Mean response time')

sns.despine()

ax.set_xlim([response_times_weekly_average.index[0], response_times_weekly_average.index[-1]])
ax.set_ylim(bottom=0)

# Minor axis
ax.grid(b=True, which='minor', color='rosybrown', linestyle='--')
ax.xaxis.set_minor_locator(mdates.HourLocator(byhour=[6, 12, 18]))
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H'))

# Major axis
ax.grid(b=True, which='major')
ax.tick_params(which='major', axis='x', length=17.5, width=1)
ax.xaxis.set_major_locator(mdates.DayLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%A'))

plt.tight_layout()
plt.plot()