In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import datetime
import re

# Covid data set

In [2]:
# ----- Data sets -----
file_path1 = 'Data/COVID-19_ziekenhuisopnames_1of2.csv'
covid_temp1 = pd.read_csv(file_path1, delimiter = ';')

file_path2 = 'Data/COVID-19_ziekenhuisopnames_2of2.csv'
covid_temp2 = pd.read_csv(file_path2, delimiter = ';')

province = ('Groningen', 'Friesland', 'Drenthe', 'Overijssel', 'Overijssel', 'Gelderland', 
            'Gelderland', 'Gelderland', 'Utrecht', 'North Holland', 'North Holland', 'North Holland', 
            'North Holland', 'North Holland', 'South Holland', 'South Holland', 'South Holland', 'South Holland', 
            'Zeeland', 'North Brabant', 'North Brabant', 'North Brabant', 'Limburg', 'Limburg', 
            'Flevoland')
            # Dataset of provinces for each security region formatted as 
            # VR01 = Groningen, VR02 = Friesland, VR03 = Drenthe, etc.

province_group = ('North', 'North', 'North', 'East',  'East',  'East', 
                  'East',  'East',  'West',  'West',  'West',  'West', 
                  'West',  'West',  'West',  'West',  'West',  'West', 
                  'West',  'South', 'South', 'South', 'South', 'South', 'East')

In [3]:
# ----- Formatting -----

# Merge both data sets
covid = pd.merge(covid_temp1, covid_temp2, how = 'outer')

# Group data set by year and month,
covid['Date_of_statistics'] = pd.to_datetime(covid['Date_of_statistics'])
covid['Year'] = pd.DatetimeIndex(covid['Date_of_statistics']).year
covid['Month'] = pd.DatetimeIndex(covid['Date_of_statistics']).month
covid = covid.groupby(['Year', 'Month', 'Security_region_code']).sum().reset_index()

# Drop irrelevant data to code faster
covid.drop(['Hospital_admission_notification', 'Version'], axis=1, inplace=True)
covid.dropna(subset=['Security_region_code'], inplace=True)

covid['Province_Group'] = np.nan
for i in range(len(covid)):
    covid.iloc[i,4] = province_group[int(covid.iloc[i,2].strip('VR'))-1]
    covid.iloc[i,2] = province[int(covid.iloc[i,2].strip('VR'))-1]
    
# covid.insert(2, 'Province_Group', Province_Group)
covid.rename(columns={"Security_region_code": "Province", "Hospital_admission": "Hospitalized"})
covid

Unnamed: 0,Year,Month,Security_region_code,Hospital_admission,Province_Group
0,2020,1,Groningen,4,North
1,2020,1,Friesland,13,North
2,2020,1,Drenthe,13,North
3,2020,1,Overijssel,7,East
4,2020,1,Overijssel,49,East
...,...,...,...,...,...
895,2022,12,North Brabant,40,South
896,2022,12,North Brabant,30,South
897,2022,12,Limburg,16,South
898,2022,12,Limburg,29,South


# Travel data set

In [4]:
file_path = 'Data/Travel_Data.csv'
travel = pd.read_csv(file_path, delimiter = ';', low_memory = False)
check = []
for i in range(len(travel)):
    check.append('LD' not in travel.iloc[i,5])
travel['Check'] = check
travel.drop(travel.loc[travel['Check']].index, inplace=True)

check = []
for i in range(len(travel)):
    check.append('2031' not in travel.iloc[i,1])
travel['Check'] = check
travel.drop(travel.loc[travel['Check']].index, inplace=True)

check = []
for i in range(len(travel)):
    check.append(int(travel.iloc[i,1].replace('2031', '')) <= 150)
travel['Check'] = check
travel.drop(travel.loc[travel['Check']].index, inplace=True)

check = []
for i in range(len(travel)):
    check.append('MW00000' not in travel.iloc[i,4])
travel['Check'] = check
travel.drop(travel.loc[travel['Check']].index, inplace=True)

In [6]:
travel

Unnamed: 0,ID,TripCharacteristics,Population,TravelPurposes,Margins,RegionCharacteristics,Periods,AverageDistanceTravelledPerTrip_1,AverageTravelTimePerTrip_2,Check
116164,116164,2031160,A048710,T001080,MW00000,LD01,2018JJ00,14.76,25.18,False
116165,116165,2031160,A048710,T001080,MW00000,LD01,2019JJ00,13.62,24.62,False
116166,116166,2031160,A048710,T001080,MW00000,LD01,2020JJ00,11.19,22.54,False
116167,116167,2031160,A048710,T001080,MW00000,LD01,2021JJ00,8.86,27.01,False
116168,116168,2031160,A048710,T001080,MW00000,LD02,2018JJ00,12.88,24.11,False
...,...,...,...,...,...,...,...,...,...,...
179271,179271,2031270,A048709,2030250,MW00000,LD03,2021JJ00,11.18,21.41,False
179272,179272,2031270,A048709,2030250,MW00000,LD04,2018JJ00,8.62,16.03,False
179273,179273,2031270,A048709,2030250,MW00000,LD04,2019JJ00,8.04,15.13,False
179274,179274,2031270,A048709,2030250,MW00000,LD04,2020JJ00,10.05,14.78,False


In [9]:
travel['TripCharacteristics'].unique()

array(['2031160', '2031170', '2031180', '2031190', '2031200', '2031210',
       '2031220', '2031230', '2031240', '2031250', '2031260', '2031270'],
      dtype=object)