In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import datetime

import warnings
warnings.filterwarnings("ignore")

pd.set_option("display.float_format", lambda x: "%.2f" % x)

## Importing data

In [None]:
df_cov = pd.read_csv("../input/covid19-tracking-germany/covid_de.csv")
df_cov["state"] = df_cov["state"].replace("Baden-Wuerttemberg", "Baden-Württemberg")
df_cov["state"] = df_cov["state"].replace("Thueringen", "Thüringen")
df_cov["date"] = pd.to_datetime(df_cov["date"])

df_dem = pd.read_csv("../input/covid19-tracking-germany/demographics_de.csv")
df_dem["state"] = df_dem["state"].replace("Baden-Wuerttemberg", "Baden-Württemberg")
df_dem["state"] = df_dem["state"].replace("Thueringen", "Thüringen")
df_dem["gender"] = np.where(df_dem["gender"] == "female", "F", "M")

df_map = gpd.read_file("../input/covid19-tracking-germany/de_state.shp")
df_map = df_map.drop(columns=["ADE", "RS", "RS_0"])

df_map_county = gpd.read_file("../input/covid19-tracking-germany/de_county.shp")

## Replace NAs

In [None]:
df_cov[(df_cov["gender"].isnull()) | (df_cov["age_group"].isnull())].sum()

In [None]:
gender_dist = df_dem.groupby(by="gender").sum()
gender_dist["proportion"] = gender_dist["population"] / gender_dist["population"].sum()
gender_dist

In [None]:
gender_na = df_cov["gender"].isna()
ind = df_cov["gender"].loc[gender_na].sample(frac=0.51).index
df_cov.loc[ind, "gender"] = "F"
df_cov["gender"].fillna("M", inplace=True)
df_dem.groupby(by="age_group").sum().idxmax()

In [None]:
df_cov["age_group"].fillna("35-59", inplace=True)
df_cov.info()

## Daily cases

In [None]:
df_daily_cases = df_cov.groupby("date").sum()

sns.set_style("whitegrid")
plt.figure(figsize=(15,8))
plt.title("Daily cases")
sns.lineplot(data=df_daily_cases["cases"])
plt.ylabel("Daily cases")
plt.show()

## Daily fatalities

In [None]:
plt.figure(figsize=(15,8))
plt.title("Daily fatalities")
sns.lineplot(data=df_daily_cases["deaths"], color="orange")
plt.ylabel("Daily fatalities")
plt.show()

## Fatality distribution

In [None]:
df_date_age = df_cov.groupby(by=["age_group"]).sum().reset_index().sort_values("age_group", ascending=True)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18,8)) 
ax1.pie(x=df_date_age["cases"], labels=df_date_age["age_group"], autopct="%1.1f%%")
psns = sns.barplot(data=df_date_age, x="age_group", y="deaths", ax=ax2)
for p in psns.patches:
    psns.annotate(format(100 * p.get_height() / df_date_age["deaths"].sum(), ".1f") + "%", 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha = "center", va = "center", 
                   xytext = (0, 9), 
                   textcoords = "offset points")
ax1.set_title("Proportional distribution of positiv cases")
ax2.set_title("Absolute and relative distribution of fatalities")
plt.show()

## Death ratio by gender

In [None]:
df_gender = df_cov.groupby(by=["age_group", "gender"]).sum().reset_index()
df_gender["death_ratio"] = 100 * df_gender["deaths"] / df_gender["cases"]

plt.figure(figsize=(15,8))
sns.lineplot(data=df_gender, x="age_group", y="death_ratio", hue="gender")
plt.title("Death ratio by gender")
plt.show()

## Geographical overview

In [None]:
df_state_cov = df_cov.groupby(by="state", as_index=False).sum()
df_state_dem = df_dem[["state", "population"]].groupby(by="state", as_index=False).sum()
df_state = df_map.merge(df_state_cov, how="left", left_on="GEN", right_on="state")
df_state = df_state.merge(df_state_dem, how="left", left_on="GEN", right_on="state")
df_state.drop(columns=["state_x", "state_y"], inplace=True)

df_state["case_ratio"] = df_state["cases"] * (1000 / df_state["population"])
df_state["death_ratio"] = df_state["deaths"] * (1000 / df_state["population"])
df_state["death_case_ratio"] = 100 * df_state["deaths"] / df_state["cases"]

df_state.set_index("GEN", inplace=True)

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18,8))
df_state.plot(column="case_ratio", legend=True,
                    legend_kwds={"label": "Per 1000 residents",
                                 "orientation": "horizontal"}, cmap="Reds", ax=ax1)
for i, geo in df_state.centroid.iteritems():
    ax1.annotate(s=i, xy=[geo.x, geo.y], color="black",
         bbox=dict(boxstyle='round,pad=0.2', fc='white', alpha=0.75))
ax1.set_title("Positive cases")

df_state.plot(column="death_ratio", legend=True,
                    legend_kwds={"label": "Per 1000 residents",
                                 "orientation": "horizontal"}, cmap="Reds", ax=ax2)
for i, geo in df_state.centroid.iteritems():
    ax2.annotate(s=i, xy=[geo.x, geo.y], color="black",
         bbox=dict(boxstyle='round,pad=0.2', fc='white', alpha=0.75))
ax2.set_title("Fatalities")

df_state.plot(column="death_case_ratio", legend=True,
                    legend_kwds={"label": "In percentage (%)",
                                 "orientation": "horizontal"}, cmap="Reds", ax=ax3)
for i, geo in df_state.centroid.iteritems():
    ax3.annotate(s=i, xy=[geo.x, geo.y], color="black",
         bbox=dict(boxstyle='round,pad=0.2', fc='white', alpha=0.75))
ax3.set_title("Deaths per case ratio")
    
ax1.axes.get_xaxis().set_visible(False)  
ax2.axes.get_xaxis().set_visible(False)  
ax3.axes.get_xaxis().set_visible(False)   
ax1.axes.get_yaxis().set_visible(False)  
ax2.axes.get_yaxis().set_visible(False)  
ax3.axes.get_yaxis().set_visible(False)  
plt.show()

## Cumulated cases over time by state

In [None]:
df_cov_rel = df_cov.groupby(by=["state", "date"]).sum()
df_cov_rel = df_cov_rel.replace([np.inf, -np.inf], 0)
df_cov_rel = df_cov_rel.replace(np.nan, 0)
df_cov_rel["cases_cumsum"] = df_cov_rel.groupby(level=-2)["cases"].cumsum()
df_cov_rel["deaths_cumsum"] = df_cov_rel.groupby(level=-2)["deaths"].cumsum()
df_cov_rel = df_cov_rel.reset_index()

In [None]:
plt.figure(figsize=(15,8))
ln = sns.lineplot(data=df_cov_rel.sort_values("cases_cumsum", ascending=False), x="date", y="cases_cumsum", hue="state")
ln.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
plt.title("Cumulated cases over time by state")
plt.show()

## Cumulated fatalities over time by state

In [None]:
plt.figure(figsize=(15,8))
ln = sns.lineplot(data=df_cov_rel.sort_values("deaths_cumsum", ascending=False), x="date", y="deaths_cumsum", hue="state")
ln.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
plt.title("Cumulated fatalities over time by state")
plt.show()

## Put demographics in relation with Covid

In [None]:
df_dem["over60"] = np.where(df_dem["age_group"] >= "60-79", 1, 0)
df_dem_age = df_dem.groupby(["state", "over60"]).sum().reset_index()
df_dem_age_temp = df_dem_age.groupby("state").sum().reset_index().rename(columns={"population":"total_population"})
df_dem_age = df_dem_age.merge(df_dem_age_temp, how="left")
df_dem_age = df_dem_age[df_dem_age["over60"] == 1]
df_dem_age["age_ratio"] = 100 * (df_dem_age["population"] / df_dem_age["total_population"])
df_state2 = df_state.reset_index()[["GEN", "case_ratio", "death_ratio", "death_case_ratio"]]
df_dem_age = df_dem_age.sort_values("age_ratio", ascending=False).reset_index(drop=True)
df_dem_age = df_dem_age.merge(df_state2, how="left", left_on="state", right_on="GEN")

In [None]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(18,8)) 
br = sns.barplot(data=df_dem_age, x="state", y="age_ratio", ax=ax1)
br.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
br.set_xticklabels(br.get_xticklabels(), rotation=90)
ax1.set_title("Residents over 60")
ax1.set_ylabel("In percentage (%)")
br = sns.barplot(data=df_dem_age, x="state", y="death_case_ratio", ax=ax2)
br.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
br.set_xticklabels(br.get_xticklabels(), rotation=90)
ax2.set_title("Deaths per case ratio")
ax2.set_ylabel("In percentage (%)")
br = sns.barplot(data=df_dem_age, x="state", y="case_ratio", ax=ax3)
br.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
br.set_xticklabels(br.get_xticklabels(), rotation=90)
ax3.set_title("Cases per 1000 residents")
ax3.set_ylabel("x per 1000")
br = sns.barplot(data=df_dem_age, x="state", y="death_ratio", ax=ax4)
br.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
br.set_xticklabels(br.get_xticklabels(), rotation=90)
ax4.set_title("Deaths per 1000 residents")
ax4.set_ylabel("x per 1000")
plt.show()