# What fraction of the basic reproduction number "R" of COVID-19 in The Netherlands is explained by the number of positive tests in the municipality of Amsterdam?
## A study of correlation using daily Dutch COVID-19 data of the National Institute for Public Health and the Environment (RIVM)

Used data: 
- https://data.rivm.nl/geonetwork/srv/dut/catalog.search#/metadata/ed0699d1-c9d5-4436-8517-27eb993eab6e
- https://data.rivm.nl/geonetwork/srv/dut/catalog.search#/metadata/5f6bc429-1596-490e-8618-1ed8fd768427

In [1]:
from datetime import datetime
from IPython.display import display
import pandas as pd
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Span, Label
output_notebook()

COLORS=('#7570b3', '#d95f02', '#1b9e77')

In [2]:
def load_data():
    # Loading and describing the RIVM data "cases per municipality per day"
    cases_per_municipality_per_day = pd.read_csv("COVID-19_aantallen_gemeente_per_dag.csv", sep=";")
    print(f"Total number of COVID-19 cases: {cases_per_municipality_per_day.Total_reported.sum()}")
    print(f"Total number of COVID-19 hospital admissions: {cases_per_municipality_per_day.Hospital_admission.sum()}")
    print(f"Total number of COVID-19 deaths: {cases_per_municipality_per_day.Deceased.sum()}")
    print(f"Number of rows in Dataframe: {len(cases_per_municipality_per_day.index)}")

    # Creating a subset of only statistics about the municipality of Amsterdam
    amsterdam_cases_per_day = cases_per_municipality_per_day[cases_per_municipality_per_day.Municipality_name == 'Amsterdam']
    amsterdam_cases_per_day = amsterdam_cases_per_day.set_index('Date_of_report').sort_index()
    # display(amsterdam_cases_per_day)
    print(f"Number of rows in Dataframe: {len(amsterdam_cases_per_day.index)}")

    # Loading and describing the RIVM data "COVID-19 reproduction number"
    R_per_day = pd.read_json('COVID-19_reproductiegetal.json')
    R_per_day = R_per_day.set_index('Date').sort_index()
    # display(R_per_day)
    return cases_per_municipality_per_day, amsterdam_cases_per_day, R_per_day

In [3]:
cases_per_municipality_per_day, amsterdam_cases_per_day, R_per_day = load_data()

Total number of COVID-19 cases: 1051965
Total number of COVID-19 hospital admissions: 23761
Total number of COVID-19 deaths: 15200
Number of rows in Dataframe: 136080
Number of rows in Dataframe: 720


In [51]:
 def plot_reproduction_number(R_per_day):
    p = figure(plot_width=1200, plot_height=400, x_axis_type="datetime",
               title=f"Reproduction number 'R' per day in the Netherlands",
               x_axis_label="Date", y_axis_label="Reproduction number", 
               toolbar_location=None, tools="",
               y_range=(0, 5))

    p.varea(x=R_per_day.index,
            y1=R_per_day.Rt_low,
            y2=R_per_day.Rt_up, color='#BDBDBD', legend_label="95% Confidence band")

    p.line(R_per_day.index, R_per_day.Rt_avg, color=COLORS[1], 
           legend_label="R average")

    hline = Span(location=1, dimension='width', line_color='grey', 
                 line_width=1, line_dash='dashed')
    p.add_layout(hline)

    switch_in_methods_date = datetime.strptime('2020-06-12', '%Y-%m-%d').date()
    vline = Span(location=switch_in_methods_date, dimension='height', line_color=COLORS[0], 
                 line_width=1, line_dash='dashed')
    p.add_layout(vline)
    label1 = Label(x=switch_in_methods_date, y=4.8, x_offset = 2,
                  text='Day the RIVM switched the calculation of R', text_font_size='12px')
    label2 = Label(x=switch_in_methods_date, y=4.6, x_offset = 2,
                  text='from based on hospitalisations to positive tests', text_font_size='12px')

    p.add_layout(label1)
    p.add_layout(label2)

    p.x_range.start = R_per_day.index.min()
    p.x_range.end = R_per_day.index.max()
    p.legend.location = 'top_right'

    show(p)

In [52]:
plot_reproduction_number(R_per_day)

### Figure 1: Reproduction number 'R' per day in the Netherlands
#### FIGURE CAPTION INFORMATION GOES HERE (TODO)