# What fraction of the basic reproduction number "R" of COVID-19 in The Netherlands is explained by the number of positive tests in the municipality of Amsterdam?
## A study of correlation using daily Dutch COVID-19 data of the National Institute for Public Health and the Environment (RIVM)

Used data: 
- https://data.rivm.nl/geonetwork/srv/dut/catalog.search#/metadata/ed0699d1-c9d5-4436-8517-27eb993eab6e
- https://data.rivm.nl/geonetwork/srv/dut/catalog.search#/metadata/5f6bc429-1596-490e-8618-1ed8fd768427

In [1]:
from datetime import datetime
from IPython.display import display
import numpy as np
import pandas as pd
from pandas.tseries.frequencies import to_offset
from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Span, Label, ColumnDataSource, Arrow, NormalHead, Title, LinearColorMapper, ColorBar, BasicTicker
from bokeh.palettes import Plasma256
from bokeh.transform import transform
output_notebook()

COLORS=('#7570b3', '#d95f02', '#1b9e77')

In [2]:
def load_data():
    """
    Author: Tijs van Lieshout
    Loading of both datasets, outputting some stats about the datasets and preparing them for further use

    Keyword arguments:
    None
    
    Returns:
    cases_per_municipality_per_day -- Dataframe containing COVID-19 cases per municipality per day for the Netherlands
    amsterdam_cases_per_day -- Dataframe containing COVID-19 cases in Amsterdam per day for the Netherlands
    total_cases_without_amsterdam_per_day -- Dataframe containing COVID-19 cases per municipality per day 
    for the Netherlands not including Amsterdam
    R_per_day -- Dataframe containing reproduction number R per (available) day for the Netherlands
    """
    # Loading and describing the RIVM data "cases per municipality per day"
    cases_per_municipality_per_day = pd.read_csv("COVID-19_aantallen_gemeente_per_dag.csv", sep=";")
    cases_per_municipality_per_day['Date_of_publication'] = pd.to_datetime(cases_per_municipality_per_day['Date_of_publication'], 
                                                                      format='%Y-%m-%d')
    cases_per_municipality_per_day = cases_per_municipality_per_day.set_index('Date_of_publication').sort_index()
    print(f"Total number of COVID-19 cases: {cases_per_municipality_per_day.Total_reported.sum()}")
    print(f"Total number of COVID-19 hospital admissions: {cases_per_municipality_per_day.Hospital_admission.sum()}")
    print(f"Total number of COVID-19 deaths: {cases_per_municipality_per_day.Deceased.sum()}")
    print(f"Number of rows in cases_per_municipality_per_day: {len(cases_per_municipality_per_day.index)}")

    # Creating a subset of only statistics about the municipality of Amsterdam
    amsterdam_cases_per_day = cases_per_municipality_per_day[cases_per_municipality_per_day.Municipality_name == 'Amsterdam']
    total_cases_without_amsterdam_per_day = cases_per_municipality_per_day[cases_per_municipality_per_day.Municipality_name != 'Amsterdam']
    # display(amsterdam_cases_per_day)
    print(f"Number of rows in amsterdam_cases_per_day: {len(amsterdam_cases_per_day.index)}")

    # Loading and describing the RIVM data "COVID-19 reproduction number"
    R_per_day = pd.read_json('COVID-19_reproductiegetal.json')
    R_per_day = R_per_day.set_index('Date').sort_index()
    # display(R_per_day)
    return cases_per_municipality_per_day, amsterdam_cases_per_day, total_cases_without_amsterdam_per_day, R_per_day

In [30]:
cases_per_municipality_per_day, amsterdam_cases_per_day, total_cases_without_amsterdam_per_day, R_per_day = load_data()

Total number of COVID-19 cases: 1068960
Total number of COVID-19 hospital admissions: 23975
Total number of COVID-19 deaths: 15406
Number of rows in cases_per_municipality_per_day: 137592
Number of rows in amsterdam_cases_per_day: 728


In [4]:
 def plot_reproduction_number(R_per_day):
    """
    Author: Tijs van Lieshout
    Plotting the reproduction number R for the Netherlands

    Keyword arguments:
    R_per_day -- Dataframe containing reproduction number R per (available) day for the Netherlands
    
    Returns:
    None
    """
    p = figure(plot_width=1000, plot_height=800, x_axis_type="datetime",
               title="Reproduction number 'R' per day in the Netherlands",
               x_axis_label="Date", y_axis_label="Reproduction number", 
               toolbar_location=None, tools="",
               y_range=(0, 5))

    p.varea(x=R_per_day.index,
            y1=R_per_day.Rt_low,
            y2=R_per_day.Rt_up, color='#BDBDBD', legend_label="95% Confidence band")

    p.line(R_per_day.index, R_per_day.Rt_avg, color=COLORS[1], 
           legend_label="R average")

    hline = Span(location=1, dimension='width', line_color='grey', 
                 line_width=1, line_dash='dashed')
    p.add_layout(hline)

    add_calculation_R_change_annotation(p)

    p.x_range.start = R_per_day.index.min()
    p.x_range.end = R_per_day.index.max()
    p.legend.location = 'top_right'

    show(p)

In [5]:
def add_calculation_R_change_annotation(p):
    """
    Author: Tijs van Lieshout
    Adding annotation about the switch of methods from the RIVM at 2020-06-12 
    for the reproduction number R of COVID-19 for the Netherlands plot

    Keyword arguments:
    p -- Bokeh figure of the reproduction number R plot
    
    Returns:
    None
    """
    switch_in_methods_date = datetime.strptime('2020-06-12', '%Y-%m-%d').date()
    vline = Span(location=switch_in_methods_date, dimension='height', line_color=COLORS[0], 
                 line_width=1, line_dash='dashed')
    p.add_layout(vline)
    label1 = Label(x=switch_in_methods_date, y=4.8, x_offset = 2,
                  text='Day the RIVM switched the calculation of R', text_font_size='14px')
    label2 = Label(x=switch_in_methods_date, y=4.7, x_offset = 2,
                  text='from based on hospitalisations to positive tests', text_font_size='14px')
    p.add_layout(label1)
    p.add_layout(label2)

In [6]:
plot_reproduction_number(R_per_day)

## Figure 1: Reproduction number 'R' per day in the Netherlands
#### FIGURE CAPTION INFORMATION GOES HERE (TODO)

In [50]:
def calculate_min_max_y_range(dataframe_with_relative_growth):
    """
    Author: Tijs van Lieshout
    Getting the relative change of COVID-19 cases and concatting the reproduction numbers to them

    Keyword arguments:
    dataframe_with_relative_growth -- Dataframe containing the column 'relative growth'
    
    Returns:
    min_y_range -- min for the y axis range of the plot
    max_y_range -- max for the y axis range of the plot
    
    >>> calculate_min_max_y_range(pd.DataFrame({"relative_growth": [-100 ,-32, 6, 75, 252, 7, -143]}))
    (-128.7, 277.2)
    """
    lowest_value = dataframe_with_relative_growth.relative_growth.replace([np.inf, -np.inf], np.nan).min()
    min_y_range =  lowest_value - (lowest_value/10)
    
    highest_value = dataframe_with_relative_growth.relative_growth.replace([np.inf, -np.inf], np.nan).max()
    max_y_range = highest_value + (highest_value/10)
        
    return min_y_range, max_y_range

In [8]:
def plot_cases_amsterdam_vs_other(total_cases_without_amsterdam, amsterdam_cases, relative=False, remove_outliers=False):
    """
    Author: Tijs van Lieshout
    Plotting the COVID-19 cases for the Netherlands vs. other municipalities in the Netherlands

    Keyword arguments:
    total_cases_without_amsterdam -- Dataframe containing COVID-19 cases per municipality 
    for the Netherlands not including Amsterdam
    amsterdam_cases -- Dataframe containing COVID-19 cases in Amsterdam for the Netherlands
    
    Returns:
    None
    """
    
    if relative:
        amsterdam_cases['relative_growth'] = amsterdam_cases.Total_reported.pct_change() * 100
        total_cases_without_amsterdam['relative_growth'] = total_cases_without_amsterdam.Total_reported.pct_change() * 100
        if remove_outliers:
            amsterdam_cases = amsterdam_cases[(amsterdam_cases.relative_growth < amsterdam_cases.relative_growth.quantile(q=0.75)) &
                                              ((amsterdam_cases.relative_growth > amsterdam_cases.relative_growth.quantile(q=0.25)))]
            total_cases_without_amsterdam = total_cases_without_amsterdam[(total_cases_without_amsterdam.relative_growth < total_cases_without_amsterdam.relative_growth.quantile(q=0.75)) &
                                                                          ((total_cases_without_amsterdam.relative_growth > total_cases_without_amsterdam.relative_growth.quantile(q=0.25)))]
        min_y_range, max_y_range = calculate_min_max_y_range(total_cases_without_amsterdam)
        title = "Relative change in"
        label = title
        y_total = total_cases_without_amsterdam.relative_growth
        y_amsterdam = amsterdam_cases.relative_growth
    else:
        max_y_range = total_cases_without_amsterdam.Total_reported.max() + total_cases_without_amsterdam.Total_reported.max()/10
        min_y_range = 0
        title = "Total number of new"
        label = "New"
        y_total = total_cases_without_amsterdam.Total_reported
        y_amsterdam = amsterdam_cases.Total_reported
    
    p = figure(plot_width=1000, plot_height=800, x_axis_type="datetime",
               title=f"{title} COVID-19 cases per day in Amsterdam vs. other municipalities in the Netherlands",
               x_axis_label="Date", y_axis_label=f"{title} COVID-19 cases", 
               toolbar_location=None, tools="", y_range=(min_y_range, max_y_range))
    
    p.circle(x=total_cases_without_amsterdam.index, 
             y=y_total,
             legend_label=f"{label} COVID-19 cases in another municipality per day",
             color="#BDBDBD")
    
    p.circle(x=amsterdam_cases.index, 
             y=y_amsterdam,
             legend_label=f"{label} COVID-19 cases in Amsterdam per day",
             color=COLORS[1])
    
    p.x_range.start = total_cases_without_amsterdam.index.min()
    p.x_range.end = total_cases_without_amsterdam.index.max()
    p.legend.location = 'top_left'
    
    show(p)

In [9]:
plot_cases_amsterdam_vs_other(total_cases_without_amsterdam_per_day, amsterdam_cases_per_day, relative=False, remove_outliers=False)

In [10]:
plot_cases_amsterdam_vs_other(total_cases_without_amsterdam_per_day, amsterdam_cases_per_day, relative=True, remove_outliers=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  amsterdam_cases['relative_growth'] = amsterdam_cases.Total_reported.pct_change() * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_cases_without_amsterdam['relative_growth'] = total_cases_without_amsterdam.Total_reported.pct_change() * 100


In [11]:
plot_cases_amsterdam_vs_other(total_cases_without_amsterdam_per_day, amsterdam_cases_per_day, relative=True, remove_outliers=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  amsterdam_cases['relative_growth'] = amsterdam_cases.Total_reported.pct_change() * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  total_cases_without_amsterdam['relative_growth'] = total_cases_without_amsterdam.Total_reported.pct_change() * 100


In [12]:
def sample_cases_weekly(total_cases_per_day, amsterdam_cases_per_day):
    """
    Author: Tijs van Lieshout
    Sampling COVID-19 cases dataframes to weekly instead of daily

    Keyword arguments:
    amsterdam_cases_per_day -- Dataframe containing COVID-19 cases in Amsterdam per day for the Netherlands
    total_cases_per_day -- Dataframe containing COVID-19 cases per municipality per day for the Netherlands
    
    Returns:
    amsterdam_cases_per_week -- Dataframe containing COVID-19 cases in Amsterdam per week for the Netherlands
    cases_per_municipality_per_week -- Dataframe containing COVID-19 cases per municipality per week 
    for the Netherlands not including Amsterdam
    """
    amsterdam_cases_per_week = amsterdam_cases_per_day.resample('W', label='left').sum()
    amsterdam_cases_per_week.index = amsterdam_cases_per_week.index + to_offset("1D")
    cases_per_municipality_per_week = total_cases_per_day.resample('W', label='left').sum()
    cases_per_municipality_per_week.index = cases_per_municipality_per_week.index + to_offset("1D")
    
    return cases_per_municipality_per_week, amsterdam_cases_per_week

In [13]:
cases_per_municipality_without_amsterdam_per_week, amsterdam_cases_per_week = sample_cases_weekly(total_cases_without_amsterdam_per_day, amsterdam_cases_per_day)

plot_cases_amsterdam_vs_other(cases_per_municipality_without_amsterdam_per_week, amsterdam_cases_per_week, relative=True, remove_outliers=True)

## Figure 2: Total number of new COVID-19 cases per day in the Amsterdam vs. other municipalities in the Netherlands
#### TODO ADD CAPTION

In [14]:
def plot_cases_netherlands_amsterdam(total_cases_without_amsterdam, amsterdam_cases, y_label_pos, timeperiod):
    """
    Author: Tijs van Lieshout
    Plotting the COVID-19 cases for the Netherlands and Amsterdam

    Keyword arguments:
    total_cases_without_amsterdam -- Dataframe containing COVID-19 cases per municipality 
    for the Netherlands not including Amsterdam
    amsterdam_cases -- Dataframe containing COVID-19 cases in Amsterdam for the Netherlands
    y_label_pos -- Int for the position of the annotation in the plot
    timeperiod -- String to put in the title of the plot (e.g. 'week' or 'day')
    
    Returns:
    None
    """
    total_cases = total_cases_without_amsterdam.groupby(total_cases_without_amsterdam.index).sum()
    amsterdam_total_cases = amsterdam_cases.groupby(amsterdam_cases.index).sum()
    
    source = ColumnDataSource(data=dict(
    x=total_cases.index,
    y1=amsterdam_total_cases.Total_reported,
    y2=total_cases.Total_reported,
    ))
    
    max_y_range = total_cases.Total_reported.max() + total_cases.Total_reported.max()/10
    
    p = figure(plot_width=1000, plot_height=800, x_axis_type="datetime",
               title=f"Total number of new COVID-19 cases per {timeperiod} in the Netherlands",
               x_axis_label="Date", y_axis_label="Total new COVID-19 cases", 
               toolbar_location=None, tools="", y_range=(0, max_y_range))

    p.varea_stack(['y1', 'y2'], x='x', color=("grey", "#BDBDBD"),
                  legend_label=("Amsterdam", "The Netherlands"), source=source)

    # Data from CBS 2020
    population_of_amsterdam = 872779
    population_of_the_netherlands = 17407585
    
    p.line(x=total_cases.index, 
           y=total_cases.Total_reported*(population_of_amsterdam/population_of_the_netherlands), 
           color=COLORS[1], line_width=2, 
           legend_label='Expected fraction of Dutch cases to be in Amsterdam based on the population')
    
    add_COVID_distribution_annotation(p, total_cases, y_label_pos)
    
    p.x_range.start = total_cases.index.min()
    p.x_range.end = total_cases.index.max()
    p.legend.location = 'top_left'

    show(p)

In [15]:
def add_COVID_distribution_annotation(p, total_cases, y_label_pos):
    """
    Author: Tijs van Lieshout
    Adding annotation about the real vs. expected COVID-19 cases in Amsterdam

    Keyword arguments:
    p -- Bokeh figure of the reproduction number R plot
    total_cases -- Dataframe containing COVID-19 cases per municipality 
    for the Netherlands not including Amsterdam
    y_label_pos -- Int for the position of the annotation in the plot
    
    Returns:
    None
    """
    highlighted_date_start = datetime.strptime('2020-09-01', '%Y-%m-%d').date()
    highlighted_date_end = datetime.strptime('2020-12-01', '%Y-%m-%d').date()
    
    label1 = Label(x=highlighted_date_start, y=y_label_pos+y_label_pos/10, x_offset = -400, y_offset=0, text_color='black',
                  text='If COVID-19 cases are uniformly distributed over the Netherlands and Amsterdam', text_font_size='14px')
    label2 = Label(x=highlighted_date_start, y=y_label_pos, x_offset = -400, y_offset=0, text_color='black',
                  text='you would expect the orange line to follow the dark grey area', text_font_size='14px')
    
    arrow = Arrow(end=NormalHead(line_color="black", line_width=1), x_start=highlighted_date_start, 
                  y_start=y_label_pos, x_end=highlighted_date_end, y_end=y_label_pos/6)
    
    p.add_layout(label1)
    p.add_layout(label2)
    p.add_layout(arrow)

In [16]:
# 3000 is for the location of the annotation text, 'day' is for the title of the plot
plot_cases_netherlands_amsterdam(total_cases_without_amsterdam_per_day, amsterdam_cases_per_day, 3000, 'day')

## Figure 3: Total number of new COVID-19 cases per day in the Netherlands and Amsterdam
#### In darker grey you can see the number of positive COVID-19 cases in Amsterdam, in light-grey the number of positive COVID-19 cases in the Netherlands. In orange you can see the expected number of positive cases by simply dividing the population of amsterdam by the population of the netherlands and multiplying it with the number of positive cases in the Netherlands. You can see that in the beginning of the second wave there were more positive cases in Amsterdam than expected by this calculation, and slightly less in than expected in the second part of the second wave.

#### Light grey area is behind dark grey area, absolute values can be read from the origin of the y-axis to the peak.

In [28]:
# 15000 is for the location of the annotation text, 'week' is for the title of the plot
# plot_cases_netherlands_amsterdam(cases_per_municipality_without_amsterdam_per_week, amsterdam_cases_per_week, 15000, 'week')

In [18]:
def process_cases_vs_R(total_cases, amsterdam_cases, reproduction_numbers, remove_outliers=False):
    """
    Author: Tijs van Lieshout
    Getting the relative change of COVID-19 cases and concatting the reproduction numbers to them

    Keyword arguments:
    amsterdam_cases -- Dataframe containing COVID-19 cases in Amsterdam 
    total_cases -- Dataframe containing COVID-19 cases per municipality 
    for the Netherlands (including Amsterdam)
    reproduction_numbers -- Dataframe containing reproduction number R per (available) day for the Netherlands
    remove_outliers -- remove the outliers of the relative growth from the dataset
    
    Returns:
    amsterdam_cases_vs_R -- Dataframe containing COVID-19 cases, relative change and reproduction numbers 
    for the Netherlands not including Amsterdam
    total_cases_vs_R -- Dataframe containing COVID-19 cases, relative change and reproduction numbers
    for all other municipalities in the Netherlands 
    """
    
    amsterdam_cases['relative_growth'] = amsterdam_cases.Total_reported.pct_change() * 100
    total_cases['relative_growth'] = total_cases.Total_reported.pct_change() * 100
    amsterdam_cases_vs_R = amsterdam_cases.join(reproduction_numbers)
    total_cases_vs_R = total_cases.join(reproduction_numbers)
    
    if remove_outliers:
        amsterdam_cases_vs_R = amsterdam_cases_vs_R[(amsterdam_cases_vs_R.relative_growth < amsterdam_cases_vs_R.relative_growth.quantile(q=0.75)) &
                                                    ((amsterdam_cases_vs_R.relative_growth > amsterdam_cases_vs_R.relative_growth.quantile(q=0.25)))]
        total_cases_vs_R = total_cases_vs_R[(total_cases_vs_R.relative_growth < total_cases_vs_R.relative_growth.quantile(q=0.75)) &
                                            ((total_cases_vs_R.relative_growth > total_cases_vs_R.relative_growth.quantile(q=0.25)))]
        
    return total_cases_vs_R, amsterdam_cases_vs_R

In [19]:
def plot_cases_vs_R(total_cases_vs_R, amsterdam_cases_vs_R, reproduction_numbers, remove_outliers=False):
    """
    Author: Tijs van Lieshout
    Plotting the change in COVID-19 cases for the Netherlands and Amsterdam vs. the reproduction number R in the Netherlands

    Keyword arguments:
    amsterdam_cases_vs_R -- Dataframe containing COVID-19 cases, relative change and reproduction numbers 
    for the Netherlands not including Amsterdam
    total_cases_vs_R -- Dataframe containing COVID-19 cases, relative change and reproduction numbers
    for all other municipalities in the Netherlands 
    reproduction_numbers -- Dataframe containing reproduction number R per (available) day for the Netherlands
    max_y_range -- max for the y axis range of the plot
    
    Returns:
    None
    """
    
    min_y_range, max_y_range = calculate_min_max_y_range(total_cases_vs_R)
    
    p = figure(plot_width=1000, plot_height=800,
               title=f"Relative change in COVID-19 cases for the Netherlands and Amsterdam vs. the reproduction number R in the Netherlands (including Amsterdam)",
               x_axis_label="Average reproduction number R", y_axis_label="Relative change of new COVID-19 cases in percentage", 
               toolbar_location=None, tools="", y_range=(min_y_range, max_y_range))
    
    p.circle(x=total_cases_vs_R.Rt_avg, 
             y=total_cases_vs_R.relative_growth,
             legend_label='Relative growth of COVID-19 cases in the Netherlands (including Amsterdam)',
             color="#BDBDBD")
    
    p.circle(x=amsterdam_cases_vs_R.Rt_avg, 
             y=amsterdam_cases_vs_R.relative_growth,
             legend_label='Relative growth of COVID-19 cases in Amsterdam per day',
             color=COLORS[1])
    
    min_R = total_cases_vs_R.Rt_avg.min()
    max_R = total_cases_vs_R.Rt_avg.max()
    
    xs, interpolated_total, interpolated_amsterdam = interpolate_cases_vs_R(min_R, max_R, total_cases_vs_R, amsterdam_cases_vs_R)
    
    p.line(x=xs, y=interpolated_total,
          legend_label='Interpolation of the Netherlands',
          color="#BDBDBD")
    p.line(x=xs, y=interpolated_amsterdam,
          legend_label='Interpolation of Amsterdam',
          color=COLORS[1])
    
    if remove_outliers:
        p.add_layout(Title(text="Outliers trimmed off", text_font_style="bold"), 'above')
        
    
    p.x_range.start = min_R
    p.x_range.end = max_R
    p.legend.location = 'top_left'
    p.legend.background_fill_alpha = 0.0
    
    show(p)

In [20]:
def interpolate_cases_vs_R(min_R, max_R, total_cases_vs_R, amsterdam_cases_vs_R):
    """
    Author: Tijs van Lieshout
    Getting the relative change of COVID-19 cases and concatting the reproduction numbers to them

    Keyword arguments:
    total_cases_vs_R -- Dataframe containing COVID-19 cases, relative change and reproduction numbers
    for the Netherlands (including Amsterdam)
    
    Returns:
    xs -- Numpy vector of x values for interpolation line
    total_ys -- Numpy vector of y values for interpolation line of the Netherlands data
    amsterdam_ys -- Numpy vector of y values for interpolation line of Amsterdam data
    """
    xs = np.linspace(min_R, max_R, 1000)
    total_cases_vs_R_without_na = total_cases_vs_R.dropna()
    total_coeff = np.polyfit(total_cases_vs_R_without_na.Rt_avg, total_cases_vs_R_without_na.relative_growth, deg=1)
    total_ys = np.polyval(total_coeff, xs)
    
    amsterdam_cases_vs_R_without_na = amsterdam_cases_vs_R.dropna()
    amsterdam_coeff = np.polyfit(amsterdam_cases_vs_R_without_na.Rt_avg, amsterdam_cases_vs_R_without_na.relative_growth, deg=1)
    amsterdam_ys = np.polyval(amsterdam_coeff, xs)
    
    return xs, total_ys, amsterdam_ys

In [21]:
cases_per_municipality_per_week, amsterdam_cases_per_week = sample_cases_weekly(cases_per_municipality_per_day, amsterdam_cases_per_day)
total_cases_vs_R, amsterdam_cases_vs_R = process_cases_vs_R(cases_per_municipality_per_week, 
                                                            amsterdam_cases_per_week, 
                                                            R_per_day)

plot_cases_vs_R(total_cases_vs_R, amsterdam_cases_vs_R, R_per_day)

In [22]:
total_cases_vs_R_no_outliers, amsterdam_cases_vs_R_no_outliers = process_cases_vs_R(cases_per_municipality_per_week, 
                                                                                    amsterdam_cases_per_week,
                                                                                    R_per_day,
                                                                                    True)
plot_cases_vs_R(total_cases_vs_R_no_outliers, amsterdam_cases_vs_R_no_outliers, R_per_day, True)

In [39]:
def plot_correlation(c, title):
    corr_df = pd.DataFrame(c.abs().stack(), columns=['correlation']).reset_index()
    source = ColumnDataSource(corr_df)

    p = figure(title=title, 
               plot_width=1000, 
               plot_height=800,
               x_range=(list(reversed(c.columns))),
               y_range=(list(c.index)),
               toolbar_location=None)

    lin_color_mapper = LinearColorMapper(palette=Plasma256, 
                                         low=corr_df.correlation.min(), 
                                         high=corr_df.correlation.max())

    p.rect(x='level_0', y='level_1', 
           width=1, height=1, source=source, line_color=None,
           fill_color=transform('correlation', lin_color_mapper))

    colorbar = ColorBar(color_mapper=lin_color_mapper,
                       location=(0, 0), 
                       ticker=BasicTicker(desired_num_ticks=len((list(reversed(c.columns))))))
    p.add_layout(colorbar, 'right')
    show(p)

In [40]:
total_corr = total_cases_vs_R.corr()
plot_correlation(total_cases_vs_R.corr(), 'Correlation in the Netherlands subset')

In [25]:
amsterdam_corr = amsterdam_cases_vs_R.corr()
plot_correlation(amsterdam_cases_vs_R.corr(), 'Correlation in Amsterdam subset')

In [41]:
total_corr_no_outliers = total_cases_vs_R_no_outliers.corr()
plot_correlation(total_corr_no_outliers.corr(), 'Correlation in the Netherlands subset (outliers removed)')

In [42]:
amsterdam_corr_no_outliers = amsterdam_cases_vs_R_no_outliers.corr()
plot_correlation(amsterdam_corr_no_outliers.corr(), 'Correlation in Amsterdam subset (outliers removed)')

In [52]:
if __name__ == "__main__":
    import doctest
    doctest.testmod(verbose=True)

Trying:
    calculate_min_max_y_range(pd.DataFrame({"relative_growth": [-100 ,-32, 6, 75, 252, 7, -143]}))
Expecting:
    (-128.7, 277.2)
ok
12 items had no tests:
    __main__
    __main__.add_COVID_distribution_annotation
    __main__.add_calculation_R_change_annotation
    __main__.interpolate_cases_vs_R
    __main__.load_data
    __main__.plot_cases_amsterdam_vs_other
    __main__.plot_cases_netherlands_amsterdam
    __main__.plot_cases_vs_R
    __main__.plot_correlation
    __main__.plot_reproduction_number
    __main__.process_cases_vs_R
    __main__.sample_cases_weekly
1 items passed all tests:
   1 tests in __main__.calculate_min_max_y_range
1 tests in 13 items.
1 passed and 0 failed.
Test passed.
