In [1]:
import pandas as pd

In [2]:
FILE_URL = 'https://pada.psycharchives.org/bitstream/9ff033a9-4084-4d0e-87eb-aa963a1324a5'
covid_df = pd.read_csv(FILE_URL, sep=",", header=[0])
print(covid_df.head().iloc[:,:5])

   Unnamed: 0               state   Measure   2020-03-08  2020-03-09
0         1.0  Baden-Wuerttemberg  leavehome         0.0         0.0
1         1.0  Baden-Wuerttemberg       dist         0.0         0.0
2         1.0  Baden-Wuerttemberg        msk         0.0         0.0
3         1.0  Baden-Wuerttemberg     shppng         0.0         0.0
4         1.0  Baden-Wuerttemberg       hcut         0.0         0.0


In [3]:
# dict with influence of measure (see readme)
measure_influence = {
    'leavehome': 1,
    'dist': 0,
    'msk': 1,
    'shppng': 2,
    'hcut': 2,
    'ess_shps': 2,
    'zoo': 0,
    'demo': 0,
    'school': 1,
    'church': 0,
    'onefriend': 0,
    'morefriends': 0,
    'plygrnd': 0,
    'daycare': 2,
    'trvl': 1,
    'gastr': 2
}
# dict with state relative population of country
state_percentages = {
    'Baden-Wuerttemberg': 0.133924061,
    'Bayern': 0.158676851,
    'Berlin': 0.044670274,
    'Brandenburg': 0.030491172,
    'Bremen': 0.008169464,
    'Hamburg': 0.022560236,
    'Hessen': 0.075833,
    'Mecklenburg-Vorpommern': 0.019245033,
    'Niedersachsen': 0.096398323,
    'Nordrhein-Westfalen': 0.214840756,
    'Rheinland-Pfalz': 0.049301337,
    'Saarland': 0.011744796,
    'Sachsen': 0.048299274,
    'Sachsen-Anhalt': 0.025752514,
    'Schleswig-Holstein': 0.035026746,
    'Thueringen': 0.025066162
}

In [4]:
def evaluate_date(request_date):
    if request_date in list(covid_df):
        truncated_covid_df = covid_df[['state', 'Measure ', request_date]]
        sum_value = 0
        for index, row in truncated_covid_df.iterrows():
            if row.isnull().values.any(): continue  # if any value in row is missing
            if measure_influence[row['Measure ']] == 0: continue  # if measure has no influence
            sum_value += ((int(row[request_date]) / 5) + 0.6) * state_percentages[row['state']] * measure_influence[
                row['Measure ']]  # see readme documentation
        return sum_value
    else:
        return 0

In [5]:
# generate and populate dataframe with all dates from 2015-1-1 - today
from datetime import date, timedelta

working_dt = date(2015, 1, 1)
end_dt = date(date.today().year, date.today().month, date.today().day)
delta = timedelta(days=1)

# generate new dataframe for output
covid_factors_df = pd.DataFrame(columns=['factor'])

# populate df
while working_dt <= end_dt:
    factor = evaluate_date(working_dt.isoformat())
    date = working_dt.isoformat()
    for hour in range(24):
        covid_factors_df.loc[pd.Timestamp(working_dt.isoformat()) + pd.Timedelta(hours=hour)] = factor
    working_dt += delta

In [6]:
# test result
covid_factors_df.head()

Unnamed: 0,factor
2015-01-01 00:00:00,0.0
2015-01-01 01:00:00,0.0
2015-01-01 02:00:00,0.0
2015-01-01 03:00:00,0.0
2015-01-01 04:00:00,0.0
