# Imports

In [None]:
import pandas as pd
import folium
import matplotlib.pyplot as plt

# Exercise 1

## Loading Data

In [None]:
PATH = {}
PATH["data_raw"] = "../../data/raw/"
PATH["data_interim"] = "../data/interim/"
PATH["data_processed"] = "../data/processed/"
PATH["data_external"] = "../data/external/"

SUBPATH = {}
SUBPATH["corona"] = "corona/"
SUBPATH["meta"] = "metadata/"
SUBPATH["shape"] = "shapefiles/"
SUBPATH["weather"] = "weather/"

FILENAME = {}

FILENAME["corona"] = "de_corona.csv"
FILENAME["meta"] = "de_metadata.json"
FILENAME["shape"] = "de.geojson"
FILENAME["weather"] = "weather.csv"

corona = PATH["data_raw"]+SUBPATH["corona"]+FILENAME["corona"]
meta = PATH["data_raw"]+SUBPATH["meta"]+FILENAME["meta"]
shape = PATH["data_raw"]+SUBPATH["shape"]+FILENAME["shape"]
weather = PATH["data_raw"]+SUBPATH["weather"]+FILENAME["weather"]

In [None]:
raw_data = {}


raw_data["corona"] = pd.read_csv(corona, sep="\t")
raw_data["weather"] = pd.read_csv(weather, sep="\t")

## Dimension

In [None]:
print("weather" + "\t dataFrame shape: " + str(raw_data["weather"].shape))

## Sanity Check

make a sanity check: are there missing values anywhere in the weather data?

In [None]:
print(raw_data["weather"].isna().any().any())

In [None]:
raw_data["weather"]["TemperatureAboveGround"] = raw_data["weather"]["TemperatureAboveGround"] - 273.15

## Filter Data

filter the weather dataframe so that it only contains rows referring to your country of study

In [None]:
interim_data = {}

In [None]:
interim_data["weather"] = raw_data["weather"][raw_data["weather"]["iso3166-2"].str.startswith("DE")]
interim_data["weather"].to_csv("interim_weather.csv")
interim_data["weather"]

## By region

aggregate the weather data by region and calculate the min, mean, median, and max of all numeric columns. Then plot one of these variables of your choice.

In [None]:
weather_by_region = interim_data["weather"].groupby(by = "iso3166-2").agg(["min", "mean", "median", "max"])
weather_by_region

In [None]:
idx = pd.IndexSlice
weather_by_region.loc[:, idx["RelativeHumiditySurface", :, :]
                     ].plot.bar(y = ("RelativeHumiditySurface", "mean"
                     ),rot=30, title="RelativeHumiditySurface").legend(loc = "best");

## By date

In [None]:
weather_by_day = interim_data["weather"].groupby(by="date").mean()

In [None]:
weather_by_day

In [None]:
weather_by_day.loc[:, "Totalprecipitation"].plot.line(rot=30).legend(loc = "best");

## Folium Stuff

In [None]:
m = folium.Map(location = [51.5, 10.7], zoom_start = 5)

In [None]:
folium.GeoJson(shape, name = "geojson").add_to(m)

folium.LayerControl().add_to(m)

m

# Exercise 2

In [378]:
with open(meta, 'r', encoding='utf-8') as f:
    country_metadata = json.load(f)

In [379]:
# First task, we need to convert the region names in the corona data with the iso3166-2 codes
# To do so, first we need a dictionary mapping the region name to the code
region_map = {country_metadata["country_metadata"][i]["covid_region_code"]: country_metadata["country_metadata"][i]["iso3166-2_code"] for i in range(len(country_metadata["country_metadata"]))}

# Then, we use the Series.map function
raw_data["corona"]["region"] = raw_data["corona"]["region_code"].map(region_map)

# Second task, we can now calculate how many cases there were in total for the region
corona_df_by_region = raw_data["corona"].groupby(by = "region")["confirmed_addition"].sum().reset_index()

# If we want to add population data, we need to create another dictionary from the country metadata
# In this case, we need to map from the region iso3166-2 code to the region's population
population_map = {
    country_metadata["country_metadata"][i]["iso3166-2_code"]: 
    country_metadata["country_metadata"][i]["population"] 
    for i in range(len(country_metadata["country_metadata"]))}

# Then we can map, just like before
corona_df_by_region["population"] = corona_df_by_region["region"].map(population_map)

corona_df_by_region["ratio"] = corona_df_by_region["confirmed_addition"]/corona_df_by_region["population"]

In [None]:
n_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)

folium.Choropleth(
    geo_data = shape,
    name = "cases",
    data = corona_df_by_region,
    columns = ["region", "confirmed_addition"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Number of Cases").add_to(n_cases)
n_cases

In [None]:
p_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)
folium.Choropleth(
    geo_data = shape,
    name = "population",
    data = corona_df_by_region,
    columns = ["region", "population"],
    key_on = "properties.iso_3166_2",
    fill_color = "YlGn", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Population").add_to(p_cases)
p_cases

In [None]:
r_cases = folium.Map(location = [51.5, 10.7], zoom_start = 6)

folium.Choropleth(
    geo_data = shape,
    name = "cases",
    data = corona_df_by_region,
    columns = ["region", "ratio"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Number of Cases").add_to(r_cases)
r_cases

In [389]:
list(corona_df_by_region.columns)

['region', 'confirmed_addition', 'population', 'ratio']

In [391]:
list(raw_data["corona"].columns)

['date', 'region_code', 'confirmed_addition', 'deceased_addition', 'region']

In [390]:
list(raw_data["weather"].columns)

['date',
 'iso3166-2',
 'RelativeHumiditySurface',
 'SolarRadiation',
 'Surfacepressure',
 'TemperatureAboveGround',
 'Totalprecipitation',
 'UVIndex',
 'WindSpeed']

In [382]:
uv_index = folium.Map(location = [51.5, 10.7], zoom_start = 6)

folium.Choropleth(
    geo_data = shape,
    name = "uv-index",
    data = corona_df_by_region,
    columns = ["region", "ratio"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd", 
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Number of Cases").add_to(r_cases)
uv_index