In [1]:
import pandas as pd
import numpy as np

pd.options.display.max_rows = 389



In [2]:
MEASURES = [
    "c1_school_closing",
    "c2_workplace_closing",
    "c3_cancel_public_events",
    "c4_restrictions_on_gatherings",
    "c5_close_public_transport",
    "c6_stay_at_home_requirements",
    "c7_movement_restriction",
    "c8_international_travel",
    "h1_public_information_campaigns",
    "h2_testing_policy",
    "h3_contact_tracing",
    "h6_facial_coverings",
    "h7_vaccination_policy",
    "h8_protection_of_elderly_people",
]

def get_daily_cases(data, col_name='series_1'):
    series = data.copy()
    series = series.groupby(by='Date_statistics').count()
    series = series.rename(columns={'Date_file': col_name})
    return series[col_name]


def make_measures_0_1(column, max_level):
    column=np.where(column==max_level,column, 0)
    column=np.where(column==0,column, 1)
    return column


def get_measure_dates_dict(df):
    measure_dates = {}

    for measure in MEASURES: 
        measure_dates[measure] = {}
        for level in np.sort(daily_data[measure].unique()):
            if level ==0: 
                continue
            else: 
                measure_level = pd.DataFrame()
                measure_level[measure]=make_measures_0_1(daily_data[measure], level)
                measure_level['Date_statistics']=daily_data['Date_statistics']
                measure_dates[measure][level] = find_start_end_dates(measure, measure_level)
    return measure_dates


def find_start_end_dates(measure,df):
    dates=[]
    for elem in range(len(df[measure])-1):
        if  df[measure][elem]!=df[measure][elem+1]:
            dates.append(df['Date_statistics'][elem])
    if len(dates)%2==1:
        dates.append(pd.to_datetime('2021-02-16 00:00:00'))
    return dates


def get_plot_area_dict(dates, measure, color):
    return dict(
        type="rect",
        x0=dates[0],
        x1=dates[1],
        y0=0,
        y1=3000,
        fillcolor=color,
        opacity=0.25,
        line_width=0
    )

MEASURES_COLORS = {"c1_school_closing": {1: "#bbc9e0", 2: "#cad5e7", 3: "#d9e1ee"}}
MEASURES_NAMES = {
    "c1_school_closing": "School Closing",
    "c2_workplace_closing": "Workplace Closing",
    "c3_cancel_public_events": "Public Events Cancelled",
}

In [3]:
#Load Data
measures = pd.read_excel('measures_strictness.xlsx', engine='openpyxl').rename(columns={'Unnamed: 0':'Date'})
cases_national= pd.read_csv('Data/COVID-19_casus_landelijk.csv', sep=';')
behaviour_data= pd.read_csv('Data/COVID-19_gedrag.csv', sep=';')

In [4]:
# Create datasets

# Put date in the right format, merge with measures data
cases_national['Date_statistics'] = pd.to_datetime(cases_national['Date_statistics'],errors='ignore', dayfirst=True)
daily_cases = get_daily_cases(cases_national, 'Number of new cases').to_frame().reset_index()
daily_cases['Date_statistics'] = pd.to_datetime(daily_cases['Date_statistics'])
daily_data = daily_cases.merge(measures, left_on='Date_statistics', right_on='Date', how='left').drop(['Date', 'stringency_index', 'government_response_index', 'containment_health_index', 'economic_support_index'], axis=1)

# Put date in the right format, filter to simplify the dataset, drop useless columns
behaviour_data['Date_of_measurement'] = pd.to_datetime(behaviour_data['Date_of_measurement'],errors='ignore')
behaviour_data = behaviour_data.loc[lambda d: (d.Region_name=='Nederland') & (d.Subgroup == 'Totaal') & (d.Sample_size != 0) & ((d.Indicator_category.str.contains('Naleving')) | (d.Indicator_category.str.contains('Zorgen')))]
behaviour_data= behaviour_data.drop(['Date_of_report', 'Wave','Region_code', 'Subgroup', 'Figure_type', 'Region_name', 'Subgroup_category', 'Change_wrt_previous_measurement'], axis=1)
behaviour_data['Normalised_Value'] = behaviour_data['Value'] / behaviour_data['Sample_size']

#new=daily_data.drop(['Date_statistics','Number of new cases'],axis=1).apply(make_measures_0_1,axis=0,result_type='expand')
#max_level_daily_data=pd.concat([daily_data['Date_statistics'],daily_data['Number of new cases'],new],axis=1)

In [5]:
daily_data

Unnamed: 0,Date_statistics,Number of new cases,c1_school_closing,c2_workplace_closing,c3_cancel_public_events,c4_restrictions_on_gatherings,c5_close_public_transport,c6_stay_at_home_requirements,c7_movement_restriction,c8_international_travel,h1_public_information_campaigns,h2_testing_policy,h3_contact_tracing,h6_facial_coverings,h7_vaccination_policy,h8_protection_of_elderly_people
0,2020-01-01,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2020-01-04,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2020-01-06,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2020-01-16,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2020-01-20,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,2020-01-22,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,2020-01-24,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,2020-01-25,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,2020-01-26,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,2020-01-27,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0


In [21]:
daily_data.to_csv('daily_data.csv',index=False)
behaviour_data.to_csv('behaviour_data.csv',index=False)

In [6]:
behaviour_data.Indicator.unique()

array(['Bij_klachten_blijf_thuis', 'Bij_klachten_laat_testen',
       'Houd_1_5m_afstand', 'Ontvang_max_bezoekers_thuis',
       'Vermijd_drukke_plekken', 'Was_vaak_je_handen',
       'Zorgen_over_Coronavirus',
       'Draag_mondkapje_in_publieke_binnenruimtes',
       'Hoest_niest_in_elleboog', 'Thuisgewerkte_uren', 'Werkt_thuis',
       'Draag_mondkapje_in_ov', 'Avondklok'], dtype=object)

In [24]:
measures_dates =get_measure_dates_dict(daily_data)
measure = 'c1_school_closing'

In [35]:
areas_dicts = {}
for level in np.sort(daily_data[measure].unique()):
    this_areas_dicts = []
    if level == 0: 
        continue
    else: 
        for i in range(int(len(measures_dates[measure][level]) / 2)):
            this_dates = [str(measures_dates[measure][level][i + i]), str(measures_dates[measure][level][i + i + 1])]
            this_areas_dicts.append(get_plot_area_dict(this_dates, measure, MEASURES_COLORS[measure][level]))
        areas_dicts[level] = this_areas_dicts

In [36]:
areas_dicts

{1: [{'x0': '2020-03-11 00:00:00',
   'x1': '2020-03-15 00:00:00',
   'y0': 0,
   'y1': 3000,
   'fillcolor': '#bbc9e0',
   'opacity': 0.25,
   'line_width': 0},
  {'x0': '2020-06-14 00:00:00',
   'x1': '2020-12-15 00:00:00',
   'y0': 0,
   'y1': 3000,
   'fillcolor': '#bbc9e0',
   'opacity': 0.25,
   'line_width': 0}],
 2: [{'x0': '2020-05-10 00:00:00',
   'x1': '2020-06-14 00:00:00',
   'y0': 0,
   'y1': 3000,
   'fillcolor': '#cad5e7',
   'opacity': 0.25,
   'line_width': 0},
  {'x0': '2021-02-07 00:00:00',
   'x1': '2021-02-16 00:00:00',
   'y0': 0,
   'y1': 3000,
   'fillcolor': '#cad5e7',
   'opacity': 0.25,
   'line_width': 0}],
 3: [{'x0': '2020-03-15 00:00:00',
   'x1': '2020-05-10 00:00:00',
   'y0': 0,
   'y1': 3000,
   'fillcolor': '#d9e1ee',
   'opacity': 0.25,
   'line_width': 0},
  {'x0': '2020-12-15 00:00:00',
   'x1': '2021-02-07 00:00:00',
   'y0': 0,
   'y1': 3000,
   'fillcolor': '#d9e1ee',
   'opacity': 0.25,
   'line_width': 0}]}