# Imports

In [None]:
import pandas as pd
import folium
import matplotlib.pyplot as plt
import json
from datetime import datetime
import imageio
from folium.features import DivIcon

# Exercise 1

## Loading Data

In [None]:
PATH = {}
PATH["data_raw"] = "../../data/raw/"
PATH["data_interim"] = "../data/interim/"
PATH["data_processed"] = "../data/processed/"
PATH["data_external"] = "../data/external/"

SUBPATH = {}
SUBPATH["corona"] = "corona/"
SUBPATH["meta"] = "metadata/"
SUBPATH["shape"] = "shapefiles/"
SUBPATH["weather"] = "weather/"

FILENAME = {}

FILENAME["corona"] = "de_corona.csv"
FILENAME["meta"] = "de_metadata.json"
FILENAME["shape"] = "de.geojson"
FILENAME["weather"] = "weather.csv"

corona = PATH["data_raw"]+SUBPATH["corona"]+FILENAME["corona"]
meta = PATH["data_raw"]+SUBPATH["meta"]+FILENAME["meta"]
shape = PATH["data_raw"]+SUBPATH["shape"]+FILENAME["shape"]
weather = PATH["data_raw"]+SUBPATH["weather"]+FILENAME["weather"]

In [None]:
raw_data = {}

dateparse = lambda x: datetime.strptime(x, '%Y-%m-%d')

raw_data['de_corona'] = pd.read_csv(corona, sep='\t', parse_dates=['date'], date_parser=dateparse)
raw_data['weather'] = pd.read_csv(weather, sep='\t', parse_dates=['date'], date_parser=dateparse)
additional = pd.read_csv('../../data/raw/weather/weather2.csv', sep='\t', parse_dates=['date'], date_parser=dateparse)
raw_data['weather'] = raw_data['weather'].append(additional)

## Dimension

In [None]:
print("weather" + "\t dataFrame shape: " + str(raw_data["weather"].shape))
print("de_corona" + "\t dataFrame shape: " + str(raw_data["de_corona"].shape))

## Sanity Check

make a sanity check: are there missing values anywhere in the weather data?

In [None]:
print(raw_data["weather"].isna().any().any())

In [None]:
raw_data["weather"]["TemperatureAboveGround"] = raw_data["weather"]["TemperatureAboveGround"] - 273.15

## Filter Data

filter the weather dataframe so that it only contains rows referring to your country of study

In [None]:
interim_data = {}

In [None]:
interim_data["weather"] = raw_data["weather"][raw_data["weather"]["iso3166-2"].str.startswith("DE")]
interim_data["weather"].to_csv("../../Data/Interim/interim_weather.csv")
interim_data["weather"]

## By region

aggregate the weather data by region and calculate the min, mean, median, and max of all numeric columns. Then plot one of these variables of your choice.

In [None]:
weather_by_region = interim_data["weather"].groupby(by = "iso3166-2").agg(["min", "mean", "median", "max"])
weather_by_region

In [None]:
idx = pd.IndexSlice
weather_by_region.loc[:, idx["RelativeHumiditySurface", :, :]
                     ].plot.bar(y = ("RelativeHumiditySurface", "mean"
                     ),rot=30, title="RelativeHumiditySurface").legend(loc = "best");

## By date

In [None]:
weather_by_day = interim_data["weather"].groupby(by="date").mean()

In [None]:
weather_by_day

In [None]:
weather_by_day.loc[:, "Totalprecipitation"].plot.line(rot=30).legend(loc = "best");

## Folium Stuff

In [None]:
m = folium.Map(location = [51.5, 10.7], zoom_start = 5)

In [None]:
folium.GeoJson(shape, name = "geojson").add_to(m)

folium.LayerControl().add_to(m)

m

# Task 3

In [None]:
with open(meta, 'r', encoding='utf-8') as f:
    country_metadata = json.load(f)

In [None]:
# First task, we need to convert the region names in the corona data with the iso3166-2 codes
# To do so, first we need a dictionary mapping the region name to the code
region_map = {country_metadata["country_metadata"][i]["covid_region_code"]: country_metadata["country_metadata"][i]["iso3166-2_code"] for i in range(len(country_metadata["country_metadata"]))}

# Then, we use the Series.map function
raw_data["de_corona"]["region"] = raw_data["de_corona"]["region_code"].map(region_map)

# Second task, we can now calculate how many cases there were in total for the region
corona_df_by_region = raw_data["de_corona"].groupby(by = "region")["confirmed_addition"].sum().reset_index()
dead_by_region      = raw_data["de_corona"].groupby(by = "region")["deceased_addition"].sum().reset_index()
# If we want to add population data, we need to create another dictionary from the country metadata
# In this case, we need to map from the region iso3166-2 code to the region's population
population_map = {
    country_metadata["country_metadata"][i]["iso3166-2_code"]: 
    country_metadata["country_metadata"][i]["population"] 
    for i in range(len(country_metadata["country_metadata"]))}

# Then we can map, just like before
corona_df_by_region["population"] = corona_df_by_region["region"].map(population_map)

corona_df_by_region["ratio"] = corona_df_by_region["confirmed_addition"]/corona_df_by_region["population"]

In [None]:
n_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)

folium.Choropleth(
    geo_data = shape,
    name = "cases",
    data = corona_df_by_region,
    columns = ["region", "confirmed_addition"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    highlight=True,
    legend_name = "Number of Cases of Covid-19").add_to(n_cases)
n_cases

In [None]:
dead = folium.Map(location = [51.5, 10.7], zoom_start = 6)

folium.Choropleth(
    geo_data = shape,
    name = "cases",
    data = dead_by_region,
    columns = ["region", "deceased_addition"],
    key_on = "properties.iso_3166_2",
    fill_color = "Greys", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    highlight=True,
    legend_name = "Number of Deceased with an covid-19 infection").add_to(dead)
dead

In [None]:
p_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)
folium.Choropleth(
    geo_data = shape,
    name = "population",
    data = corona_df_by_region,
    columns = ["region", "population"],
    key_on = "properties.iso_3166_2",
    fill_color = "YlGn", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    highlight=True,
    legend_name = "Population").add_to(p_cases)
p_cases

In [None]:
r_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)

tiles = ['stamenwatercolor', 'cartodbpositron', 'openstreetmap', 'stamenterrain']
for tile in tiles:
    folium.TileLayer(tile).add_to(r_cases)

folium.Choropleth(
    geo_data = shape,
    name = "cases",
    data = corona_df_by_region,
    columns = ["region", "ratio"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Number of Cases pr. population of region",
    highlight=True
).add_to(r_cases)

folium.LayerControl().add_to(r_cases)

r_cases

In [None]:
# dividing by 24 since there is 24 hours in a day, though this takes into account night where the uvindex is 
# really small/nonexsistent
# Could be argued that it should be divided by 16-18 since these are the hours where the sun is present. 
UV_by_region = interim_data["weather"].groupby(by = "iso3166-2")["UVIndex"].mean().reset_index()
UV_by_region["UVIndex_scaled"] = UV_by_region["UVIndex"]/24

In [None]:
uv_index = folium.Map(location = [51.5, 10.7], zoom_start = 6)

folium.Choropleth(
    geo_data = shape,
    name = "uv-index",
    data = UV_by_region,
    columns = ["iso3166-2", "UVIndex_scaled"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    highlight=True,
    legend_name = "UVIndex",
    smooth_factor=0).add_to(uv_index)
uv_index

In [None]:
temp_by_region = interim_data["weather"].groupby(by = "iso3166-2")["TemperatureAboveGround"].mean().reset_index()
temp_by_region["TemperatureAboveGround_scaled"] = temp_by_region["TemperatureAboveGround"]/24

In [None]:
TAG = folium.Map(location = [51.5, 10.7], zoom_start = 6)

folium.Choropleth(
    geo_data = shape,
    name = "uv-index",
    data = temp_by_region,
    columns = ["iso3166-2", "TemperatureAboveGround_scaled"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    highlight=True,
    legend_name = "average Temperature Above Ground on a daily basis").add_to(TAG)
TAG

In [None]:
raw_data['de_corona']['month'] = raw_data['de_corona']['date'].dt.strftime('%B')
raw_data['de_corona']['year'] = raw_data['de_corona']['date'].dt.strftime('%Y')
raw_data["de_corona"]["population"] = raw_data["de_corona"]["region"].map(population_map)
raw_data["de_corona"]["confirmed_addition_ratio"] = raw_data["de_corona"]["confirmed_addition"]/raw_data["de_corona"]["population"]

In [None]:
raw_data['de_corona']['month']

In [None]:
cases_2020_monthly = raw_data["de_corona"][(raw_data["de_corona"]["year"] == "2020")].groupby(["month", "region"]).sum()
cases_2021_monthly = raw_data["de_corona"][(raw_data["de_corona"]["year"] == "2021")].groupby(["month", "region"]).sum()

In [None]:
confirmed_addition_ratio_maps = dict()
for i in list(raw_data["de_corona"][(raw_data["de_corona"]["year"] == "2020")]["month"].unique()):
    n_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)
    
    folium.map.Marker(
    [54.5, 2],
    icon=DivIcon(
        icon_size=(400,50),
        icon_anchor=(0,0),
        html='<div style="font-size: 20pt">{}</div>'.format(i),
        )
    ).add_to(n_cases)
    
    folium.Choropleth(
        geo_data = shape,
        name = "cases",
        data = cases_2020_monthly.loc[i].reset_index(),
        columns = ["region", "confirmed_addition_ratio"],
        key_on = "properties.iso_3166_2",
        fill_color = "OrRd", 
        fill_opacity = 0.7,
        line_opacity = 0.2,
        highlight=True,
        nan_fill_color = "purple",
        legend_name = "Number of Cases of Covid-19 pr. population of region in {} 2020".format(i)).add_to(n_cases)
    confirmed_addition_ratio_maps[i+"-2020"] = n_cases
for i in list(raw_data["de_corona"][(raw_data["de_corona"]["year"] == "2021")]["month"].unique()):
    n_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)

    folium.map.Marker(
    [54.5, 2],
    icon=DivIcon(
        icon_size=(400,50),
        icon_anchor=(0,0),
        html='<div style="font-size: 20pt">{}</div>'.format(i),
        )
    ).add_to(n_cases)
    
    folium.Choropleth(
        geo_data = shape,
        name = "cases",
        data = cases_2021_monthly.loc[i].reset_index(),
        columns = ["region", "confirmed_addition_ratio"],
        key_on = "properties.iso_3166_2",
        fill_color = "OrRd", 
        fill_opacity = 0.7,
        line_opacity = 0.2,
        highlight=True,
        nan_fill_color = "purple",
        legend_name = "Number of Cases of Covid-19 pr. population of region in {} 2021".format(i)).add_to(n_cases)
    confirmed_addition_ratio_maps[i+"-2021"] = n_cases    

In [None]:
confirmed_addition_maps = dict()
for i in list(raw_data["de_corona"][(raw_data["de_corona"]["year"] == "2020")]["month"].unique()):
    n_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)
    
    folium.map.Marker(
    [54.5, 2],
    icon=DivIcon(
        icon_size=(400,50),
        icon_anchor=(0,0),
        html='<div style="font-size: 20pt">{}-2020</div>'.format(i),
        )
    ).add_to(n_cases)
    
    folium.Choropleth(
        geo_data = shape,
        name = "cases",
        data = cases_2020_monthly.loc[i].reset_index(),
        columns = ["region", "confirmed_addition"],
        key_on = "properties.iso_3166_2",
        fill_color = "OrRd", 
        fill_opacity = 0.7,
        line_opacity = 0.2,
        highlight=True,
        nan_fill_color = "purple",
        legend_name = "Number of Cases of Covid-19 in {} 2020".format(i)).add_to(n_cases)
    confirmed_addition_maps[i+"-2020"] = n_cases
for i in list(raw_data["de_corona"][(raw_data["de_corona"]["year"] == "2021")]["month"].unique()):
    n_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)

    folium.map.Marker(
    [54.5, 2],
    icon=DivIcon(
        icon_size=(400,50),
        icon_anchor=(0,0),
        html='<div style="font-size: 20pt">{}-2021</div>'.format(i),
        )
    ).add_to(n_cases)
    
    folium.Choropleth(
        geo_data = shape,
        name = "cases",
        data = cases_2021_monthly.loc[i].reset_index(),
        columns = ["region", "confirmed_addition"],
        key_on = "properties.iso_3166_2",
        fill_color = "OrRd", 
        fill_opacity = 0.7,
        line_opacity = 0.2,
        highlight=True,
        nan_fill_color = "purple",
        legend_name = "Number of Cases of Covid-19 in {} 2021".format(i)).add_to(n_cases)
    confirmed_addition_maps[i+"-2021"] = n_cases   
confirmed_addition = ["maps/confirmed_addition/"+i+".png" for i in confirmed_addition_maps.keys()]
images = []
for filename in confirmed_addition:
    images.append(imageio.imread(filename))
imageio.mimsave('map_gifs/confirmed_addition_maps.gif', images, duration=2)

In [None]:
deceased_addition_maps = dict()
for i in list(raw_data["de_corona"][(raw_data["de_corona"]["year"] == "2020")]["month"].unique()):
    n_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)

    folium.map.Marker(
    [54.5, 2],
    icon=DivIcon(
        icon_size=(400,50),
        icon_anchor=(0,0),
        html='<div style="font-size: 20pt">{}-2020</div>'.format(i),
        )
    ).add_to(n_cases)
    
    folium.Choropleth(
        geo_data = shape,
        name = "cases",
        data = cases_2020_monthly.loc[i].reset_index(),
        columns = ["region", "deceased_addition"],
        key_on = "properties.iso_3166_2",
        fill_color = "Greys", 
        fill_opacity = 0.7,
        line_opacity = 0.2,
        highlight=True,
        nan_fill_color = "purple",
        legend_name = "Number of deceased with a Covid-19 infection in {} 2020".format(i)).add_to(n_cases)
    deceased_addition_maps[i+"-2020"] = n_cases
for i in list(raw_data["de_corona"][(raw_data["de_corona"]["year"] == "2021")]["month"].unique()):
    n_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)

    folium.map.Marker(
    [54.5, 2],
    icon=DivIcon(
        icon_size=(400,50),
        icon_anchor=(0,0),
        html='<div style="font-size: 20pt">{}-2021</div>'.format(i),
        )
    ).add_to(n_cases)
    
    folium.Choropleth(
        geo_data = shape,
        name = "cases",
        data = cases_2021_monthly.loc[i].reset_index(),
        columns = ["region", "deceased_addition"],
        key_on = "properties.iso_3166_2",
        fill_color = "Greys", 
        fill_opacity = 0.7,
        line_opacity = 0.2,
        highlight=True,
        nan_fill_color = "purple",
        legend_name = "Number of deceased with a Covid-19 infection in {} 2021".format(i)).add_to(n_cases)
    deceased_addition_maps[i+"-2021"] = n_cases   
deceased_addition = ["maps/deceased_addition/"+i+".png" for i in confirmed_addition_maps.keys()]
images = []
for filename in deceased_addition:
    images.append(imageio.imread(filename))
imageio.mimsave('map_gifs/deceased_addition_maps.gif', images, duration=2)

All the monthly maps can be accessed by doing, there has also been made gifs of the maps
```python
deceased_addition_maps["January-2020"]
confirmed_addition_maps["January-2020"]
confirmed_addition_ratio_maps["January-2020"]
```
Missing data, or there was no recorded data in that region, the color of the region will be purple
Month and year can be changed

In [None]:
confirmed_addition_maps["January-2020"]

![SegmentLocal](map_gifs/confirmed_addition_maps.gif "segment")

In [None]:
deceased_addition_maps["February-2021"]

![SegmentLocal](map_gifs/deceased_addition_maps.gif "segment")

In [None]:
confirmed_addition_ratio_maps["Jan-2020"]

In [None]:
fig = plt.figure(figsize=(15,5))
axes = fig.add_axes([0,0,1,1])
axes.bar(raw_data["de_corona"]["date"], raw_data["de_corona"]["confirmed_addition"]);
#axes.set_yscale('log');