# Inference on Hawaii data and observe effects of epsilon

## Load all information and modify them to the correct format

In [1]:
# Import libraries
import numpy as np
import math
import branchpro
import scipy.stats
import plotly.graph_objects as go
import os
import pandas as pd


In [2]:
# Read serial interval
si_file = 'si-epinow'
path = os.path.join('../data_library/serial_interval', '{}.csv'.format(si_file))
serial_interval = pd.read_csv(path, header=None)
serial_interval = serial_interval.fillna(0)
serial_intervals = serial_interval.values.T

In [3]:
time_key = 'Time'

# Read australia region data
regions = ['Hawaii', 'Honolulu', 'Kauai', 'Maui']
regions_locally_infected_cases = []
regions_imported_cases = []
regions_times = []
regions_start_times = []
max_num_timepoints = 0
for region in regions:
    path = os.path.join('../data_library/covid_hawaii/', '{}.csv'.format(region))
    data = pd.read_csv(path)

    num_timepoints = max(data[time_key])
    max_num_timepoints = max(num_timepoints, max_num_timepoints)

num_timepoints = max_num_timepoints
total_locally_infected_cases = [0] * num_timepoints
total_imported_cases = [0] * num_timepoints

for region in regions:
    path = os.path.join('../data_library/covid_hawaii/', '{}.csv'.format(region))
    data = pd.read_csv(path)

    data_times = data[time_key]

    # Pad with zeros the time points where we have no information on
    # the number of incidences
    padded_inc_data = data.set_index(time_key).reindex(
        range(
            1, num_timepoints+1)
            ).fillna(0).reset_index()
    locally_infected_cases = padded_inc_data['Incidence Number']
    imported_cases = padded_inc_data['Imported Cases']

    start_times = np.arange(1, num_timepoints+1, dtype=int)
    times = np.arange(num_timepoints+1)

    regions_locally_infected_cases.append(locally_infected_cases)
    regions_imported_cases.append(imported_cases)
    regions_times.append(times)
    regions_start_times.append(start_times)
    
    # Sum of all regions
    total_locally_infected_cases = [sum(x) for x in zip(
        locally_infected_cases, total_locally_infected_cases)]
    total_imported_cases = [sum(x) for x in zip(
        imported_cases, total_imported_cases)]

## Comparison of inference results for different epsilon values

In [None]:
# Initialise parameter values
tau = 2
R_t_start = tau+1
a = 1
b = 0.8

# Run inferences for different values of epsilon
column_names = ['Time Points', 'Mean', 'Lower bound CI', 'Upper bound CI', 'Central Probability', 'Epsilon']
epsilon_range = np.arange(-1, 1.1, 0.2)
chosen_times_hawaii = [20, 40, 50, 60]
chosen_times_honolulu = [10, 20, 30, 40, 50]
chosen_times_kauai = [20, 30, 100, 110]
chosen_times_maui = [20, 30, 40, 50]
chosen_times_all = [chosen_times_hawaii, chosen_times_honolulu,
                chosen_times_kauai, chosen_times_maui]
chosen_times_df = pd.DataFrame(
        {'Region': regions,
         'Interested times': chosen_times_all})

regions_all_intervals = []
for region_num in range(len(regions)):
    all_intervals = pd.DataFrame(columns=column_names)
    
    inc_data = pd.DataFrame(
        {
            'Time': regions_start_times[region_num],
            'Incidence Number': regions_locally_infected_cases[region_num]
        }
    )

    imported_inc_data = pd.DataFrame(
        {
            'Time': regions_start_times[region_num],
            'Incidence Number': regions_imported_cases[region_num]
        }
    )
    chosen_times = chosen_times_all[region_num]
    for epsilon in epsilon_range:
        inference = branchpro.LocImpBranchProPosteriorMultSI(
            inc_data=inc_data,
            imported_inc_data=imported_inc_data,
            epsilon=epsilon,
            daily_serial_intervals=serial_intervals,
            alpha=a,
            beta=1/b)

        inference.run_inference(tau=tau)
        intervals = inference.get_intervals(central_prob=0)
        intervals = intervals.append(inference.get_intervals(central_prob=.25))
        intervals = intervals.append(inference.get_intervals(central_prob=.95))
        temp = inference.get_intervals(central_prob=.95)

        intervals['Epsilon'] = [epsilon] * len(intervals.index)

        intervals_chosen = pd.DataFrame(columns=intervals.columns)
        for time_pt in chosen_times:
            intervals_chosen = intervals_chosen.append(
                intervals.loc[intervals['Time Points']== time_pt])

        all_intervals = all_intervals.append(intervals_chosen)
        
    regions_all_intervals.append(all_intervals)

In [None]:
from plotly.subplots import make_subplots

# Create list of names for x-axis and traces
group_name = ['Day{}'.format(x) for x in chosen_times]

for region_num in range(len(regions)):
    # Plot (bar chart cases each day)
    fig = go.Figure()
    fig = make_subplots(rows=2, cols=1)

    # Plot of incidences
    fig.add_trace(
        go.Bar(
            x=regions_times[region_num],
            y=regions_locally_infected_cases[region_num],
            name='Local Incidences'
        ),
        row=1, col=1
    )

    fig.add_trace(
        go.Bar(
            x=regions_times[region_num],
            y=regions_imported_cases[region_num],
            name='Imported Incidences'
        ),
        row=1, col=1
    )

    all_intervals = regions_all_intervals[region_num]
    # Sort results
    all_intervals.sort_values(by=['Epsilon','Time Points'], inplace=True)

    n = len(group_name)
    for i in range(len(epsilon_range)):
        fig.add_trace(
            go.Box(
                mean=all_intervals.loc[all_intervals['Central Probability']==0]['Mean'][i*n:(i+1)*n],
                median=all_intervals.loc[all_intervals['Central Probability']==0]['Lower bound CI'][i*n:(i+1)*n],
                q1=all_intervals.loc[all_intervals['Central Probability']==0.25]['Lower bound CI'][i*n:(i+1)*n],
                q3=all_intervals.loc[all_intervals['Central Probability']==0.25]['Upper bound CI'][i*n:(i+1)*n],
                upperfence=all_intervals.loc[all_intervals['Central Probability']==0.95]['Upper bound CI'][i*n:(i+1)*n],
                lowerfence=all_intervals.loc[all_intervals['Central Probability']==0.95]['Lower bound CI'][i*n:(i+1)*n],
                x=group_name,
                name=str(round(epsilon_range[i],1))
            ),
            row=2, col=1
        )
    fig.update_layout(boxmode='group', title=regions[region_num])
    fig.update_xaxes(title_text='Time (days)', row=1, col=1)
    fig.update_yaxes(title_text='New cases', row=1, col=1)
    fig.update_yaxes(title_text='R_t', row=2, col=1)
    
    fig.show()

In [None]:
# Initialise parameter values
tau = 2
R_t_start = tau+1
a = 1
b = 0.8

# Run inferences for different values of epsilon
column_names = ['Time Points', 'Mean', 'Lower bound CI', 'Upper bound CI', 'Central Probability', 'Epsilon']
epsilon_range = np.arange(-1, 1.1, 0.2)
chosen_times_1 = [10, 20, 30, 40, 50]
chosen_times_2 = [100, 110, 120, 130, 140]
chosen_times_3 = [150, 160, 170, 180, 190]
chosen_times_all = [chosen_times_1, chosen_times_2, chosen_times_3]
# Create list of names for x-axis and traces
group_name = ['Day{}'.format(x) for x in chosen_times]

In [None]:
all_intervals = pd.DataFrame(columns=column_names)

inc_data = pd.DataFrame(
    {
        'Time': regions_start_times[0],
        'Incidence Number': total_locally_infected_cases
    }
)

imported_inc_data = pd.DataFrame(
    {
        'Time': regions_start_times[0],
        'Incidence Number': total_imported_cases
    }
)

for chosen_times in chosen_times_all:
    for epsilon in epsilon_range:
        inference = branchpro.LocImpBranchProPosteriorMultSI(
            inc_data=inc_data,
            imported_inc_data=imported_inc_data,
            epsilon=epsilon,
            daily_serial_intervals=serial_intervals,
            alpha=a,
            beta=1/b)

        inference.run_inference(tau=tau)
        intervals = inference.get_intervals(central_prob=0)
        intervals = intervals.append(inference.get_intervals(central_prob=.25))
        intervals = intervals.append(inference.get_intervals(central_prob=.95))
        temp = inference.get_intervals(central_prob=.95)

        intervals['Epsilon'] = [epsilon] * len(intervals.index)

        intervals_chosen = pd.DataFrame(columns=intervals.columns)
        for time_pt in chosen_times:
            intervals_chosen = intervals_chosen.append(
                intervals.loc[intervals['Time Points']== time_pt])

        all_intervals = all_intervals.append(intervals_chosen)
        
    time_all_intervals.append(all_intervals)

In [None]:
# Plot (bar chart cases each day)
fig = go.Figure()
fig = make_subplots(rows=4, cols=1)

# Plot of incidences
fig.add_trace(
    go.Bar(
        x=regions_times[region_num],
        y=regions_locally_infected_cases[region_num],
        name='Local Incidences'
    ),
    row=1, col=1
)

fig.add_trace(
    go.Bar(
        x=regions_times[region_num],
        y=regions_imported_cases[region_num],
        name='Imported Incidences'
    ),
    row=1, col=1
)

for num in range(len(chosen_times_all)):
    all_intervals = time_all_intervals[num]
    # Sort results
    all_intervals.sort_values(by=['Epsilon','Time Points'], inplace=True)

    # Create list of names for x-axis and traces
    chosen_times = chosen_times_all[num]
    group_name = ['Day{}'.format(x) for x in chosen_times]
    
    n = len(group_name)
    for i in range(len(epsilon_range)):
        fig.add_trace(
            go.Box(
                mean=all_intervals.loc[all_intervals['Central Probability']==0]['Mean'][i*n:(i+1)*n],
                median=all_intervals.loc[all_intervals['Central Probability']==0]['Lower bound CI'][i*n:(i+1)*n],
                q1=all_intervals.loc[all_intervals['Central Probability']==0.25]['Lower bound CI'][i*n:(i+1)*n],
                q3=all_intervals.loc[all_intervals['Central Probability']==0.25]['Upper bound CI'][i*n:(i+1)*n],
                upperfence=all_intervals.loc[all_intervals['Central Probability']==0.95]['Upper bound CI'][i*n:(i+1)*n],
                lowerfence=all_intervals.loc[all_intervals['Central Probability']==0.95]['Lower bound CI'][i*n:(i+1)*n],
                x=group_name,
                name=str(round(epsilon_range[i],1))
            ),
            row=num + 2, col=1
        )
fig.update_layout(boxmode='group', title=regions[region_num])
fig.update_xaxes(title_text='Time (days)', row=1, col=1)
fig.update_yaxes(title_text='New cases', row=1, col=1)
fig.update_yaxes(title_text='R_t', row=2, col=1)

fig.show()