In [None]:
import pandas as pd
import os
import numpy as np
import epiweeks
import datetime

In [None]:
    cases_csv = 'https://raw.githubusercontent.com/KITmetricslab/covid19-forecast-hub-de/master/data-truth/RKI/by_age/truth_RKI-Incident%20Cases%20by%20Age_Germany.csv'
    cases = pd.read_csv(cases_csv)
    cases['age_group'].replace(dict([['A00-A04', '00-04'], ['A05-A14', '05-14'], ['A15-A34', '15-34'],
                                      ['A35-A59', '35-59'], ['A60-A79', '60-79'], ['A80+', '80+']]), inplace=True)
    cases = cases.rename(columns={'age_group': 'Age group'})
    cases

In [None]:
deaths_csv = 'https://raw.githubusercontent.com/KITmetricslab/covid19-forecast-hub-de/master/data-truth/RKI/by_age/truth_RKI-Incident%20Deaths%20by%20Age_Germany.csv'
deaths = pd.read_csv(deaths_csv)
deaths.loc[deaths['location_name'] == 'Free State of Thuringia', 'location_name'] = 'Free State of Thüringia'
deaths['age_group'].replace(dict([['A00-A04', '00-04'], ['A05-A14', '05-14'], ['A15-A34', '15-34'],
                                  ['A35-A59', '35-59'], ['A60-A79', '60-79'], ['A80+', '80+']]), inplace=True)
deaths.drop(deaths.loc[deaths['age_group']=='unbekannt'].index, inplace=True)
deaths = deaths.rename(columns={'age_group': 'Age group'})
deaths
    

In [None]:
locations1 = list(deaths['location_name'].unique())
locations2 = ['Total', 'Baden-Württemberg', 'Bayern', 'Bremen', 'Hamburg', 'Hessen', 'Niedersachsen',
              'Nordrhein-Westfalen', 'Rheinland-Pfalz', 'Saarland', 'Schleswig-Holstein',
              'Brandenburg', 'Mecklenburg-Vorpommern',
              'Sachsen', 'Sachsen-Anhalt', 'Thüringen', 'Berlin']
all_locations = dict(zip(locations2, locations1))
population_excel = './data/Germany/population_2020.xlsx'
population = pd.read_excel(population_excel)
population.columns = ['Age'] + list(population[3:4].to_numpy()[0][1:])
population = population[5:-6].reset_index(drop=True)
population = population.rename(columns=all_locations)
population

In [None]:
    total_deaths_excel = 'https://www.destatis.de/DE/Themen/Gesellschaft-Umwelt/Bevoelkerung/Sterbefaelle-Lebenserwartung/Tabellen/sonderauswertung-sterbefaelle.xlsx;jsessionid=1A8D9AD59C7B0337F82EF6DBEB7D2749.live721?__blob=publicationFile'
    sheet_name = 'BL_2016_2021_KW_AG_Ins'
    groupby = ['unter … Jahren']
    total_deaths = pd.read_excel(total_deaths_excel, sheet_name=sheet_name)
    total_deaths.columns = ['Nr', 'Year', 'Region', 'Age group'] + list(np.arange(1, 54))
    total_deaths = total_deaths[8:][total_deaths.columns[1:]].fillna(0).replace('X ', 0).reset_index(drop=True)
    total_deaths['Region'].replace(all_locations, inplace=True)
    total_deaths['Age group'].replace({'0-65': '00-65', 'Insgesamt': 'Total', '85 u. mehr': '85+'}, inplace=True)
    total_deaths
    

In [None]:
    population_excel = './data/Germany/population_2015-2019.xlsx'
    population_old = pd.read_excel(population_excel)
    # population_old = population_old[5:-6].reset_index(drop=True)
    population_old.columns = population.columns
    prior_populations = dict()
    step = len(population) + 1
    for start in range(4, 4 + step * 5, step):
        population_year = int((population_old[start:start + 1].to_numpy()[0][0])[-4:])
        df = population_old[start + 1:start + 1 + len(population)].reset_index(drop=True)
        df['Total'] = df[df.columns[1:]].apply(lambda x: sum(x), axis=1)
        df['Age'] = population['Age']
        df = df.rename(columns=all_locations)
        df.index = df['Age']
        prior_populations[population_year] = df
    prior_populations

In [None]:
prior_populations = dict()
step = len(population) + 1
for start in range(4, 4 + step * 5, step):
    population_year = int((population_old[start:start + 1].to_numpy()[0][0])[-4:])
    df = population_old[start + 1:start + 1 + len(population)].reset_index(drop=True)
    df['Total'] = df[df.columns[1:]].apply(lambda x: sum(x), axis=1)
    df['Age'] = population['Age']
    df.index = df['Age']
    df = df.rename(columns=all_locations)
    prior_populations[population_year] = df
prior_populations

population['Total'] = population[population.columns[1:]].apply(lambda x: sum(x), axis=1)
population.index = population['Age']
population.rename(columns={'Total': 'Germany'}, inplace=True)
population

In [None]:
ratios = pd.read_csv('./data/Germany/positive_test_to_death_days_distribution.csv')
ratios.index = ratios['offset']
ratios = ratios['probs']
ratios

In [None]:
IFRs = {
    'O\'Driscoll': {
        '35-59': [0.122, 0.115, 0.128],
        '60-79': [0.992, 0.942, 1.045],
        '80+': [7.274, 6.909, 7.656]
    },
    'Verity': {
        '35-59': [0.349, 0.194, 0.743],
        '60-79': [2.913, 1.670, 5.793],
        '80+': [7.800, 3.800, 13.30]
    },
    'Perez-Saez': {
        '35-59': [0.070, 0.047, 0.097],
        '60-79': [3.892, 2.985, 5.145],
        '80+': [5.600, 4.300, 7.400]
    },
    'Levin': {
        '35-59': [0.226, 0.212, 0.276],
        '60-79': [2.491, 2.294, 3.266],
        '80+': [15.61, 12.20, 19.50]
    },
    'Driscoll (ours)': {
        '35-59': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
        '60-79': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
        '80+': [7.105, 8.292, 9.593]
                       }
}
IFRs = {k: {k1: sorted(np.array(v1) / 100) for k1, v1 in v.items()} for k, v in IFRs.items()}
IFRs

In [None]:
locations = list(deaths['location_name'].unique())
locations

In [None]:
from module.dark_figure import DarkFigure
sim = DarkFigure(deaths, cases, total_deaths, population, prior_populations)
sim(locations, IFRs, ratios)