In [1]:
# Importig the libraries we need
import folium, json
import pandas as pd

In [11]:
# Storing the paths to the datafiles
# Remember to change this if your folder structre is different
geo_json_path = "../data/raw/shapefiles/de.geojson"
corona_df = pd.read_csv("../data/raw/corona/de_corona.csv", sep = "\t")

# Loading the country metadata directly, since it is a JSON file
with open("../data/raw/metadata/de_metadata.json", 'r') as f:
    country_metadata = json.load(f)
print(corona_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5602 entries, 0 to 5601
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   date                5602 non-null   object
 1   region_code         5602 non-null   object
 2   confirmed_addition  5602 non-null   int64 
 3   deceased_addition   5602 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 175.2+ KB
None


In [14]:
# First task, we need to convert the region names in the corona data with the iso3166-2 codes
# To do so, first we need a dictionary mapping the region name to the code
region_map = {country_metadata["country_metadata"][i]["covid_region_code"]: country_metadata["country_metadata"][i]["iso3166-2_code"] for i in range(len(country_metadata["country_metadata"]))}

# Then, we use the Series.map function
corona_df["region"] = corona_df["region_code"].map(region_map)

# Second task, we can now calculate how many cases there were in total for the region
corona_df_by_region = corona_df.groupby(by = "region").sum().reset_index()

# If we want to add population data, we need to create another dictionary from the country metadata
# In this case, we need to map from the region iso3166-2 code to the region's population
population_map = {country_metadata["country_metadata"][i]["iso3166-2_code"]: country_metadata["country_metadata"][i]["population"] for i in range(len(country_metadata["country_metadata"]))}

# Then we can map, just like before
corona_df_by_region["population"] = corona_df_by_region["region"].map(population_map)

corona_df_by_region

Unnamed: 0,region,confirmed_addition,deceased_addition,population
0,DE-BB,74955,2914,2511917
1,DE-BE,126896,2728,3644826
2,DE-BY,428996,12123,13076721
3,DE-HB,17485,327,682986
4,DE-HE,184434,5672,6265809
5,DE-HH,50291,1223,1841179
6,DE-MV,23436,692,1609675
7,DE-NI,159191,4104,7982448
8,DE-NW,521548,12632,17932651
9,DE-RP,100118,2993,4084844


In [16]:
# Third task, mapping the number of cases in a choropeth map
# We start by initializing the folium map
m_cases = folium.Map(location = [50.5, 4.7], zoom_start = 8)

# Then we use the Choropleth function, which requires: the path to the geoJSON (the shapes),
# a name for the layer, the dataframe containing the data, the columns we want to focus on,
# the attribute from the geoJSON which we use to map the data with the corresponding geometry
# in our case this is the iso code. Finally, we specify the color map, and the opacity (so that
# we can still see the map beneath), and a title.
folium.Choropleth(
    geo_data = geo_json_path,
    name = "cases",
    data = corona_df_by_region,
    columns = ["region", "confirmed_addition"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd",
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Number of Cases",
).add_to(m_cases)

m_cases

In [17]:
# Fourth task: is more cases bad, or could it be explained simpyl by a larger population?
# Let's make another map using the exact same function, this time we plot population
# instead of cases. We also use a different color map.
m_pop = folium.Map(location = [50.5, 4.7], zoom_start = 8)

folium.Choropleth(
    geo_data = geo_json_path,
    name = "population",
    data = corona_df_by_region,
    columns = ["region", "population"],
    key_on = "properties.iso_3166_2",
    fill_color = "YlGn",
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Population",
).add_to(m_pop)

m_pop

In [18]:
# Fifth task, we create a new column in our data frame with the cases per capita
corona_df_by_region["cases_pc"] = corona_df_by_region["confirmed_addition"] / corona_df_by_region["population"]

# And we plot it just like we did before
m_cases_pc = folium.Map(location = [50.5, 4.7], zoom_start = 8)

folium.Choropleth(
    geo_data = geo_json_path,
    name = "cases",
    data = corona_df_by_region,
    columns = ["region", "cases_pc"],
    key_on = "properties.iso_3166_2",
    fill_color = "OrRd",
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = "Number of Cases",
).add_to(m_cases_pc)

m_cases_pc