# COVID-19 cases

## Imports

In [None]:
import pandas as pd
import geopandas as gpd
import json
import requests

from pathlib import Path
from zipfile import ZipFile

import plotly.express as px

## Shapefiles

In [None]:
shapefile = 'gemeente_2020_v1.shp'

In [None]:
# Download data
download_dir = Path('../downloads')
download_dir.mkdir(exist_ok=True)

url = 'https://www.cbs.nl/-/media/cbs/dossiers/nederland-regionaal/wijk-en-buurtstatistieken/wijkbuurtkaart_2020_v1.zip'
filename = url.split('/')[-1]

if not Path(download_dir /filename).is_file():
    try:
        r = requests.get(url)
        with open(download_dir /filename,'wb') as output_file:
            output_file.write(r.content)
    except:
        pass

In [None]:
# Extract data
shapefiles_dir = Path('../shapefiles')
shapefiles_dir.mkdir(exist_ok=True)

if not Path(shapefiles_dir /shapefile).is_file():
    with ZipFile(download_dir /filename, 'r') as zipObj:
        zipObj.extractall(shapefiles_dir)

In [None]:
# Read data
gdf = gpd.read_file(shapefiles_dir / shapefile)
gdf.sample(n=3)

In [None]:
# Filter rows (i.e. land only) and select columns
gemeente = gdf.loc[gdf.H2O=='NEE', ['GM_NAAM', 'AANT_INW', 'geometry']]
gemeente.sample(n=3)

## COVID-19 data

In [None]:
base_url = 'https://data.rivm.nl/covid-19'
file_name = 'COVID-19_aantallen_gemeente_per_dag.csv'

In [None]:
data_dir = Path('../data')
data_dir.mkdir(exist_ok=True)

try:
    rivm = pd.read_csv(f'{base_url}/{file_name}', sep=';')
    print(f'Data downloaded from {base_url}')
except:
    print('Data read from disk')
    rivm = pd.read_csv(data_dir / 'rivm.csv', sep=';')

In [None]:
datum = '2021-01-05'

In [None]:
df = rivm.loc[rivm.Date_of_publication == datum, ['Municipality_name', 'Total_reported']]\
.dropna().groupby(by = 'Municipality_name').sum('Total_reported').reset_index()
df.sample(n=5)

## Merge datasets

In [None]:
gdf = pd.merge(gemeente, df, how='left', left_on='GM_NAAM', right_on='Municipality_name')
gdf.drop('Municipality_name', axis=1, inplace=True)
gdf['AANT_COVID_100K'] = 100_000 / gdf.AANT_INW * gdf.Total_reported # Aantal besmettingen per 100.000 inwoners
gdf.sample(n=5)

## Split datasets

In [None]:
geo_data = gdf.drop(['AANT_INW', 'Total_reported', 'AANT_COVID_100K'], axis=1)
geo_data.sample(n=3)

In [None]:
data = gdf.drop(['AANT_INW', 'Total_reported', 'geometry'], axis=1)\
.rename(columns = {'GM_NAAM': 'Municipality_name'})
data.info()

In [None]:
data.fillna(0, inplace=True)

In [None]:
data.sample(n=3)

## Plotly

In [None]:
# Convert Coordinate Reference System to Web Mercator
geo_data.geometry = geo_data.geometry.to_crs('EPSG:4326') # to_crs({'init': 'epsg:4326'}) --> deprecated syntax

In [None]:
# GeoJSON
geojson = json.loads(geo_data.to_json())

In [None]:
fig = px.choropleth(data, geojson=geojson,
                    locations='Municipality_name', featureidkey='properties.GM_NAAM',
                    color='AANT_COVID_100K',
                    projection='mercator'
                   )
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}) # right, top, left, bottom
fig.show()