# Imports and config

In [None]:
import os

import numpy as np
import pandas as pd

directory = os.getcwd().split('/')[-1]

if directory == 'notebooks':
    %cd ..

In [None]:
# Matplotlib & seaborn setup
import matplotlib
import seaborn as sns
# matplotlib.use('PDF')

sns.set(rc={'figure.figsize':(11, 4)})
sns.set_style('ticks')

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter

print(f'matplotlib backend: {matplotlib.get_backend()}')

In [None]:
# configuration variables
should_save = True

In [None]:
incidents = pd.read_csv('data/processed_data.csv', index_col=0, parse_dates=True).sort_index()
incidents

# Resampling

In [None]:
incidents_hourly = pd.DataFrame(incidents.resample('H').count()['hastegrad'])
incidents_hourly = incidents_hourly.rename(columns={'hastegrad': 'incidents'})
incidents_hourly = incidents_hourly.sort_index()
incidents_hourly

## Hourly incidents

In [None]:
incidents_hourly['hour'] = incidents_hourly.index.hour
incidents_hourly['day'] = incidents_hourly.index.day
incidents_hourly['month'] = incidents_hourly.index.month
incidents_hourly['year'] = incidents_hourly.index.year
incidents_hourly['weekday'] = incidents_hourly.index.weekday
incidents_hourly

## Daily incidents

In [None]:
incidents_daily =  pd.DataFrame(incidents_hourly['incidents'].resample('D').sum())
incidents_daily['month'] = incidents_daily.index.month
incidents_daily['year'] = incidents_daily.index.year
incidents_daily['weekday'] = incidents_daily.index.weekday
incidents_daily

## Aggregated weekly average

In [None]:
incidents_weekly_average = incidents_hourly['incidents'] \
    .groupby([incidents_hourly.weekday, incidents_hourly.hour]) \
    .agg(['mean', 'std'])

reference_week = pd.date_range(start='1/5/2015', end='1/12/2015', closed='left', freq='H')
incidents_weekly_average = incidents_weekly_average.set_index(reference_week)
incidents_weekly_average['weekday'] = incidents_weekly_average.index.weekday
incidents_weekly_average

## Monthly incidents

In [None]:
incidents_monthly =  pd.DataFrame(incidents_daily['incidents'].resample('M').sum())
incidents_monthly['year'] = incidents_monthly.index.year
incidents_monthly

## Yearly incidents

In [None]:
incidents_yearly =  pd.DataFrame(incidents_monthly['incidents'].resample('Y').sum())
incidents_yearly

# Plotting

# Auto-correlation

In [None]:
# TODO
pd.plotting.autocorrelation_plot(incidents_daily)


## Incidents per day

In [None]:
x, y = incidents_daily.index, incidents_daily.incidents
ax = sns.lineplot(data=incidents_daily, x=x, y=y)
ax.set(xlabel='Time', ylabel='Incidents per day')

rolling_mean = y.rolling(window=30, center=True, min_periods=28).mean()
ax.plot(rolling_mean, 'k-', label='30-days rolling mean')

x = mdates.date2num(x)
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
ax.plot(x, p(x), linestyle='-.', color='red', label='Trend line')


sns.despine()
ax.legend()

ax.set_xlim(left=incidents_daily.index[0])

# Minor axis
ax.grid(b=True, which='minor')
ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[4, 7, 10]))
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%b'))

# Major axis
ax.grid(b=True, which='major')
ax.tick_params(which='major', axis='x', length=15, width=1)
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

plt.tight_layout()

if should_save:
    plt.savefig('results/daily_incidents.pdf', dpi=600)


## Monthly plots

In [None]:
def int_to_month(x, _):
    months = [
        'Jan',
        'Feb',
        'Mar',
        'Apr',
        'May',
        'Jun',
        'Jul',
        'Aug',
        'Sep',
        'Oct',
        'Nov',
        'Dec'
    ]
    return months[x]


In [None]:
ax = sns.violinplot(data=incidents_daily, x='month', y='incidents')
ax.set(xlabel='Month', ylabel='Average incidents per day')

sns.despine(trim=True)

ax.xaxis.set_major_formatter(FuncFormatter(int_to_month))

if should_save:
    plt.savefig('results/monthly_violin_plot.pdf', dpi=600)

In [None]:
mean = incidents_daily['incidents'].mean()
deviation = incidents_daily['incidents'].groupby([incidents_daily.month]).mean() - mean

ax = sns.barplot(x=deviation.index, y=deviation)
ax.set(xlabel='Month', ylabel='Deviation from daily mean')
ax.axhline(0, color='k')

sns.despine(trim=True)

ax.xaxis.set_major_formatter(FuncFormatter(int_to_month))

if should_save:
    plt.savefig('results/monthly_deviation_from_mean.pdf', dpi=600)

## Weekday violin plot

In [None]:
def int_to_weekday(x, _):
    return ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][x]

ax = sns.violinplot(data=incidents_hourly, x='weekday', y='incidents')
ax.set(xlabel='Weekday', ylabel='Average incidents per hour')

sns.despine(trim=True)

ax.xaxis.set_major_formatter(FuncFormatter(int_to_weekday))

if should_save:
    plt.savefig('results/weekday_violin_plot.pdf', dpi=600)

## Hourly average

In [None]:
ax = sns.lineplot(data=incidents_hourly, x='hour', y='incidents')

ax.set(xlabel='Hour of the day', ylabel='Average incidents per hour')

sns.despine()

ax.set_ylim(bottom=0)
ax.set_xlim(left=0)

ax.grid()
ax.xaxis.set_ticks(np.arange(0, 25, 1))

if should_save:
    plt.savefig('results/daily_average.pdf', dpi=600)

## Per day average 

In [None]:

ax = sns.lineplot(data=incidents_weekly_average, x=incidents_weekly_average.index, y='mean')
ax.set(xlabel='Time of week', ylabel='Average incidents per hour')

sns.despine()

ax.set_xlim([incidents_weekly_average.index[0], incidents_weekly_average.index[-1]])
ax.set_ylim(bottom=0)

# Minor axis
ax.grid(b=True, which='minor', color='rosybrown', linestyle='--')
ax.xaxis.set_minor_locator(mdates.HourLocator(byhour=[6, 12, 18]))
ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H'))

# Major axis
ax.grid(b=True, which='major')
ax.tick_params(which='major', axis='x', length=17.5, width=1)
ax.xaxis.set_major_locator(mdates.DayLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%A'))

plt.tight_layout()

if should_save:
    plt.savefig('results/average_week.pdf', dpi=600)