In [1]:
import pandas as pd
import os
import numpy as np
import epiweeks
import datetime
import requests
import zipfile
import csv
from contextlib import ExitStack
import progressbar as pbar
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import json
import warnings
warnings.filterwarnings('ignore')

In [2]:
def prepare_weekly_covid_deaths(region: str):
    daily_regional_deaths = deaths[deaths['location_name'] == region].reset_index(drop=True)
    daily_regional_deaths = daily_regional_deaths.pivot(values='value', columns='date', index='Age group').fillna(0)
    daily_regional_deaths[daily_regional_deaths < 0] = 0

    weekly_regional_deaths = daily_regional_deaths.copy()
    [str(c) for c in daily_regional_deaths.columns]
    weekly_regional_deaths.columns = [epiweeks.Week.fromdate(datetime.date(year=int(str(c)[:4]), month=int(str(c)[5:7]), day=int(str(c)[8:10]))).isoformat() for c in daily_regional_deaths.columns]           
    t = weekly_regional_deaths.transpose()
    
    weekly_regional_deaths = t.groupby(t.index).sum().transpose()
    return daily_regional_deaths, weekly_regional_deaths

In [3]:
def prepare_weekly_covid_cases(region: str):
    daily_regional_cases = cases[cases['location_name'] == region].reset_index(drop=True)
    daily_regional_cases = daily_regional_cases.pivot(values='value', index='Age group', columns='date').fillna(0)
    daily_regional_cases[daily_regional_cases < 0] = 0

    weekly_regional_cases = daily_regional_cases.copy()
    weekly_regional_cases.columns = [epiweeks.Week.fromdate(datetime.date(year=int(str(c)[:4]), month=int(str(c)[5:7]), day=int(str(c)[8: 10]))).isoformat() for c in daily_regional_cases.columns]
    t = weekly_regional_cases.transpose()
    weekly_regional_covid_cases = t.groupby(t.index).sum().transpose()
    return weekly_regional_covid_cases

In [4]:
def prepare_regional_deaths(region: str):
    max_deaths = total_deaths[np.logical_and(total_deaths['Region'] == region, total_deaths['Year'] < str(2020))]
    max_deaths = max_deaths.groupby('Age group').apply(max)[max_deaths.columns[3:]]
    return max_deaths

In [5]:
def calculate_correction_due_to_aging(region: str, max_deaths):
    x = population.copy()
    x.iloc[:, 1:] = (prior_populations[2015].iloc[:, 1:] / population.iloc[:, 1:])

    correction_due_to_aging = {year: {age_group: float(x[x['Age group'] == age_group][region]) for age_group in x['Age group'].unique()}for year in prior_populations.keys()}
    return correction_due_to_aging

In [6]:
def calculate_reference_max_deaths(region, correction_due_to_aging, total_deaths):
    total_deaths = total_deaths[total_deaths['Year'] > str(2015)]
    max_deaths_correct = total_deaths[np.logical_and(total_deaths['Region'] == region, total_deaths['Year'] < '2020')].drop(columns=['Region']).reset_index(drop=True)
    max_deaths_correct['Multiplier'] = max_deaths_correct[['Year', 'Age group']].apply(lambda x: correction_due_to_aging[int(x[0])][x[1]], axis=1)

    for col in range(1, 52 + 1):
        max_deaths_correct[col] = max_deaths_correct[col] / max_deaths_correct['Multiplier']
    max_deaths_correct = max_deaths_correct.groupby('Age group').apply(max)[max_deaths_correct.columns[2:-2]]
    max_deaths_correct.columns = [f'W{i:02d}' for i in range(1, 52 + 1)]
    y2020 = max_deaths_correct.add_prefix('2020')
    y2021 = max_deaths_correct.add_prefix('2021')
    y2022 = max_deaths_correct.add_prefix('2022')
    y2020['2020W53'] = np.maximum(max_deaths_correct['W01'], max_deaths_correct['W52'])
    max_deaths_concatenated = pd.concat([y2020, y2021, y2022], axis=1)
    max_deaths_concatenated[weekly_regional_deaths.columns]
    return max_deaths_concatenated

In [7]:
def calculate_overall_covid_deaths(region):
    overall_deaths = total_deaths[np.logical_and(total_deaths['Region'] == region, total_deaths['Year'] >= str(2020))].drop(columns=['Region']).reset_index(drop=True)
    y2020 = overall_deaths[overall_deaths['Year'] == '2020'].reset_index(drop=True)[overall_deaths.columns[1:]]
    y2020.columns = ['Age group'] + [f'2020W{i:02d}' for i in range(1, 53 + 1)]
    y2021 = overall_deaths[overall_deaths['Year'] == '2021'].reset_index(drop=True)[overall_deaths.columns[1:-1]]
    y2021.columns = ['Age group'] + [f'2021W{i:02d}' for i in range(1, 52 + 1)]
    y2022 = overall_deaths[overall_deaths['Year'] == '2021'].reset_index(drop=True)[overall_deaths.columns[1:-1]]
    y2022.columns = ['Age group'] + [f'2022W{i:02d}' for i in range(1, 52 + 1)]
    overall_deaths = pd.concat([y2020[y2020.columns[1:]], y2021, y2022], axis=1)
    overall_deaths.index = y2020['Age group']
    overall_deaths = overall_deaths.sort_index()
    return overall_deaths

In [8]:
today = datetime.date.today().strftime("%d-%m-%Y")
data_dir_path = f'data/World/{today}'
result_dir_path = f'result/World/{today}'
if not os.path.exists(f'{result_dir_path}'):
    os.mkdir(f'{result_dir_path}')
with open(f'{data_dir_path}/eurostat_countries.json', 'r') as file:
    countries = json.load(file)

In [9]:
format_label = pbar.FormatCustomText('Country: %(country)s Running: %(running)s   ', dict(country='start', running='start'))
widgets = [pbar.ETA(), ' ', pbar.Timer(), ' ', pbar.Percentage(format=' (%(percentage)3d%%) '), '  ', format_label]
bar = pbar.ProgressBar(widgets=widgets, maxval=len(countries))
bar.start()

# Data from Germany
ratios = pd.read_csv('./data/Germany/positive_test_to_death_days_distribution.csv')
ratios.index = ratios['offset']
ratios = ratios['probs']
IFRs = {
    'O\'Driscoll': {
         '00-04': [0.122, 0.115, 0.128],
         '05-09': [0.122, 0.115, 0.128],
         '10-14': [0.122, 0.115, 0.128],
         '15-19': [0.122, 0.115, 0.128],
         '20-24': [0.122, 0.115, 0.128],
         '25-29': [0.122, 0.115, 0.128],
         '30-34': [0.122, 0.115, 0.128],
         '35-39': [0.122, 0.115, 0.128],
         '40-44': [0.122, 0.115, 0.128],
         '45-49': [0.122, 0.115, 0.128],
         '50-54': [0.122, 0.115, 0.128],
         '55-59': [0.122, 0.115, 0.128],
         '60-64': [0.992, 0.942, 1.045],
         '65-69': [0.992, 0.942, 1.045],
         '70-74': [0.992, 0.942, 1.045],
         '75-79': [0.992, 0.942, 1.045],
         '80-84': [7.274, 6.909, 7.656],
         '85+': [7.274, 6.909, 7.656]
    },
    'Verity': {
         '00-04': [0.349, 0.194, 0.743],
         '05-09': [0.349, 0.194, 0.743],
         '10-14': [0.349, 0.194, 0.743],
         '15-19': [0.349, 0.194, 0.743],
         '20-24': [0.349, 0.194, 0.743],
         '25-29': [0.349, 0.194, 0.743],
         '30-34': [0.349, 0.194, 0.743],
         '35-39': [0.349, 0.194, 0.743],
         '40-44': [0.349, 0.194, 0.743],
         '45-49': [0.349, 0.194, 0.743],
         '50-54': [0.349, 0.194, 0.743],
         '55-59': [0.349, 0.194, 0.743],
         '60-64': [2.913, 1.670, 5.793],
         '65-69': [2.913, 1.670, 5.793],
         '70-74': [2.913, 1.670, 5.793],
         '75-79': [2.913, 1.670, 5.793],
         '80-84': [7.800, 3.800, 13.30],
         '85+': [7.800, 3.800, 13.30]
    },
    'Perez-Saez': {
         '00-04': [0.070, 0.047, 0.097],
         '05-09': [0.070, 0.047, 0.097],
         '10-14': [0.070, 0.047, 0.097],
         '15-19': [0.070, 0.047, 0.097],
         '20-24': [0.070, 0.047, 0.097],
         '25-29': [0.070, 0.047, 0.097],
         '30-34': [0.070, 0.047, 0.097],
         '35-39': [0.070, 0.047, 0.097],
         '40-44': [0.070, 0.047, 0.097],
         '45-49': [0.070, 0.047, 0.097],
         '50-54': [0.070, 0.047, 0.097],
         '55-59': [0.070, 0.047, 0.097],
         '60-64': [3.892, 2.985, 5.145],
         '65-69': [3.892, 2.985, 5.145],
         '70-74': [3.892, 2.985, 5.145],
         '75-79': [3.892, 2.985, 5.145],
         '80-84': [5.600, 4.300, 7.400],
         '85+': [5.600, 4.300, 7.400]
    },
    'Levin': {
         '00-04': [0.226, 0.212, 0.276],
         '05-09': [0.226, 0.212, 0.276],
         '10-14': [0.226, 0.212, 0.276],
         '15-19': [0.226, 0.212, 0.276],
         '20-24': [0.226, 0.212, 0.276],
         '25-29': [0.226, 0.212, 0.276],
         '30-34': [0.226, 0.212, 0.276],
         '35-39': [0.226, 0.212, 0.276],
         '40-44': [0.226, 0.212, 0.276],
         '45-49': [0.226, 0.212, 0.276],
         '50-54': [0.226, 0.212, 0.276],
         '55-59': [0.226, 0.212, 0.276],
         '60-64': [2.491, 2.294, 3.266],
         '65-69': [2.491, 2.294, 3.266],
         '70-74': [2.491, 2.294, 3.266],
         '75-79': [2.491, 2.294, 3.266],
         '80-84': [15.61, 12.20, 19.50],
         '85+': [15.61, 12.20, 19.50]
    },
    'Driscoll (ours)': {
         '00-04': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '05-09': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '10-14': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '15-19': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '20-24': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '25-29': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '30-34': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '35-39': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '40-44': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '45-49': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '50-54': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '55-59': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '60-64': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
         '65-69': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
         '70-74': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
         '75-79': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
         '80-84': [7.105, 8.292, 9.593],
         '85+': [7.105, 8.292, 9.593]
                       }
}
# IFRs = {k: {k1: sorted(np.array(v1) / 100) for k1, v1 in v.items()} for k, v in IFRs.items()}
age_group_translator = {'Y_LT5': '00-04', 'Y5-9': '05-09',
                        'Y10-14': '10-14', 'Y15-19': '15-19',
                        'Y20-24': '20-24', 'Y25-29': '25-29',
                        'Y30-34': '30-34', 'Y35-39': '35-39',
                        'Y40-44': '40-44', 'Y45-49': '45-49',
                        'Y50-54': '50-54', 'Y55-59': '55-59',
                        'Y60-64': '60-64', 'Y65-69': '65-69',
                        'Y70-74': '70-74', 'Y75-79': '75-79',
                        'Y80-84': '80-84', 'Y_GE85': '85+',
                        'Y85-89': '85-89', 'Y_GE90': '90+',}

# with open(f'{result_dir_path}/simulation_complited.csv', 'w') as file:
#     writer = csv.writer(file)
#     writer.writerow(['completed'])
    
# region_translator = {}
# for country, regions in countries.items():
#     for region, short in regions.items():
#         c = country if region == 'All' else region
#         region_translator[c] = short[:2] if c == country else short
# print(region_translator['Germany'])

for country_index, (country, regions) in enumerate(countries.items()):
    format_label.update_mapping(country=country)
    bar.update()
    if not os.path.exists(f'{data_dir_path}/{country}/old_deaths.csv'):
        continue
    if not os.path.exists(f'{data_dir_path}/{country}/old_population.csv'):
        continue
    if not os.path.exists(f'{data_dir_path}/{country}/covid_cases.csv'):
        continue
    if not os.path.exists(f'{data_dir_path}/{country}/covid_deaths.csv'):
        continue
    with open(f'{result_dir_path}/simulation_complited.csv', 'a') as file:
        writer = csv.writer(file)
        writer.writerow([country])
    
    # Load cases
    format_label.update_mapping(running='Loading cases' + ' ' * 30)
    bar.update()
    cases = pd.read_csv(f'{data_dir_path}/{country}/covid_cases.csv', index_col=0).reset_index(drop=True)
    
    # Load deaths
    format_label.update_mapping(running='Loading deaths' + ' ' * 30)
    bar.update()
    deaths = pd.read_csv(f'{data_dir_path}/{country}/covid_deaths.csv', index_col=0).reset_index(drop=True)

    # Load population
    format_label.update_mapping(running='Loading population' + ' ' * 30)
    bar.update()
    population = pd.read_csv(f'{data_dir_path}/Germany/old_population.csv').replace(age_group_translator)
    population = population[population['Year'] >= 2020].reset_index(drop=True)
    new_df = pd.DataFrame({'Age group': population['Age group'].unique()})
    for i, region in enumerate(population['Region'].unique()):
        new_df.insert(i + 1, region, list(population[np.logical_and(population['Region'] == region, population['Sex'] == 'T')]['Value']), True)
    new_df = new_df[~new_df['Age group'].isin(['UNK', 'Y_GE75', 'Y_GE80'])].sort_values('Age group').reset_index(drop=True)
    new_df.to_csv(f'{data_dir_path}/{country}/population.csv', index=False)
    population = new_df
    population.set_index('Age group', inplace=True)
    age_group_combiner = {'Germany': {'80+': ['80-84', '85+'], '60-79': ['60-64', '65-69', '70-74', '75-79'], '00-59': ['00-04', '05-09', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59']}}
    for new_age_group, age_groups_to_remove in age_group_combiner[country].items():
        population = population.append(pd.DataFrame(population[population.index.isin(age_groups_to_remove)].sum(), columns=[new_age_group]).T)
        population.drop(age_groups_to_remove, inplace=True)
        population.sort_index(inplace=True)
    population = population.reset_index()
    population.rename(columns = {'index': 'Age group'}, inplace=True)
    bar.update()
    
    # Load and prepare total deaths
    format_label.update_mapping(running='Loading total deaths' + ' ' * 30)
    bar.update()
    total_deaths = pd.read_csv(f'{data_dir_path}/{country}/old_deaths.csv')
    total_deaths = total_deaths[total_deaths['Sex'] == 'T'].reset_index(drop=True)
    total_deaths = total_deaths[total_deaths['Year'].apply(lambda x: x[:4]) >= str(2015)].reset_index(drop=True)
    new_df = pd.DataFrame(columns=['Year', 'Region', 'Age group'] + list(np.arange(1, 54)))
    new_df = new_df.append(total_deaths[total_deaths['Sex'] == 'T'])
    new_df['Year'] = new_df['Year'].apply(lambda x: x[:4])
    new_df = new_df.drop(columns='Sex', axis=1)
    new_df = new_df.drop(columns='Value', axis=1)
    new_df = new_df.drop_duplicates()
    bar.update()
    
    for i in range(len(total_deaths)):
        region = total_deaths.iloc[i]['Region']
        age_group = total_deaths.iloc[i]['Age group']
        year = total_deaths.iloc[i]['Year'][:4]
        week = int(total_deaths.iloc[i]['Year'][6:])
        x = np.logical_and(new_df['Region'] == region, new_df['Year'] == year)
        x = np.logical_and(x, new_df['Year'] >= str(2015))
        x = np.logical_and(x, new_df['Age group'] == age_group)
        new_df.loc[x, week] = total_deaths.iloc[i]['Value']
        bar.update()
    total_deaths = new_df[new_df['Year'] >= str(2015)]
    total_deaths = total_deaths.drop(total_deaths.loc[total_deaths['Age group']=='UNK'].index)
    total_deaths = total_deaths.drop(total_deaths.loc[total_deaths['Age group']=='Y_GE75'].index)
    total_deaths = total_deaths.drop(total_deaths.loc[total_deaths['Age group']=='Y_GE80'].index).fillna(0)
    total_deaths.replace(age_group_translator, inplace=True)
    total_deaths = total_deaths.append(total_deaths[total_deaths['Age group'].isin(['85-89', '90+'])].groupby(['Year', 'Region']).sum().reset_index())
    total_deaths = total_deaths.fillna({'Age group': '85+'})
    total_deaths = total_deaths[~total_deaths['Age group'].isin(['85-89', '90+'])].reset_index(drop=True)
    bar.update()
    
    # Load and prepare prior population
    format_label.update_mapping(running='Loading prior population' + ' ' * 30)
    bar.update()
    old_population = pd.read_csv(f'{data_dir_path}/{country}/old_population.csv')
    old_population = old_population[np.logical_and(old_population['Year'] >= 2015, old_population['Year'] < 2020)].reset_index(drop=True)
    prior_populations = dict()

    for year in range(2015, 2020):
        df = pd.DataFrame(columns=['Age group'] + list(old_population['Region'].unique()))
        df['Age group'] = old_population['Age group'].unique()
        for region in old_population['Region'].unique():
            condition = np.logical_and(old_population['Region'] == region, old_population['Year'] == year)
            condition = np.logical_and(condition, old_population['Sex'] == 'T')
            if len(old_population[condition]['Value']) < len(df['Age group']):
                df.loc[:, region] = 0
            else:
                df.loc[:, region] = list(old_population[condition]['Value'])
            bar.update()
        df = df.drop(df.loc[df['Age group']=='UNK'].index)
        df = df.drop(df.loc[df['Age group']=='Y_GE75'].index)
        df = df.drop(df.loc[df['Age group']=='Y_GE80'].index)
        df['Age group'].replace(age_group_translator, inplace=True)
        prior_populations[year] = df.sort_values(by='Age group').reset_index().drop('index', axis=1)
    for year, prior_population in prior_populations.items():
        prior_populations
        prior_population.set_index('Age group', inplace=True)
        age_group_combiner = {'Germany': {'80+': ['80-84', '85+'], '60-79': ['60-64', '65-69', '70-74', '75-79'], '00-59': ['00-04', '05-09', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49', '50-54', '55-59']}}
        for new_age_group, age_groups_to_remove in age_group_combiner[country].items():
            prior_population = prior_population.append(pd.DataFrame(prior_population[prior_population.index.isin(age_groups_to_remove)].sum(), columns=[new_age_group]).T)
            prior_population.drop(age_groups_to_remove, inplace=True)
            prior_population.sort_index(inplace=True)
        prior_population = prior_population.reset_index()
        prior_population.rename(columns = {'index': 'Age group'}, inplace=True)
        prior_populations[year] = prior_population
    bar.update()
    
    # Prepare regions set
    cases_regions_set = set(cases['location_name'].unique())
    deaths_regions_set = set(deaths['location_name'].unique())
    population_regions_set = set(population.columns)
    total_deaths_regions_set = set(total_deaths['Region'].unique())
    prior_populations_regions_set = set(prior_populations[2015].columns)
    regions_set = cases_regions_set
    regions_set = regions_set.intersection(deaths_regions_set)
    regions_set = regions_set.intersection(population_regions_set)
    regions_set = regions_set.intersection(total_deaths_regions_set)
    regions_set = regions_set.intersection(prior_populations_regions_set)
    print(regions_set)
    

    for region in regions_set:
        # Prepare dir
        format_label.update_mapping(running='Prepare dir' + ' ' * 30)
        bar.update()
        if not os.path.exists(f'{result_dir_path}/{country}'):
            os.mkdir(f'{result_dir_path}/{country}')
        region = region.replace('*', '')
        if not os.path.exists(f'{result_dir_path}/{country}/{region}'):
            os.mkdir(f'{result_dir_path}/{country}/{region}')  
            
        # Simulation
        format_label.update_mapping(running='Simulation' + ' ' * 30)
        bar.update()
        daily_regional_deaths, weekly_regional_deaths = prepare_weekly_covid_deaths(region)
        weekly_regional_cases = prepare_weekly_covid_cases(region)
        age_group_combiner = {'Germany': {'00-59': ['00-04', '05-14', '15-34', '35-59']}}
        for new_age_group, age_groups_to_remove in age_group_combiner[country].items():
            weekly_regional_deaths = weekly_regional_deaths.append(pd.DataFrame(weekly_regional_deaths[weekly_regional_deaths.index.isin(age_groups_to_remove)].sum(), columns=[new_age_group]).T)
            weekly_regional_deaths.drop(age_groups_to_remove, inplace=True)
            weekly_regional_deaths.sort_index(inplace=True)
            
        max_deaths = prepare_regional_deaths(region)
        
        age_group_combiner = {'Germany': {'80+': ['80-84', '85+'], '60-79': ['60-64', '65-69', '70-74', '75-79'], '00-59': ['40-44', '45-49', '50-54', '55-59']}}
        for new_age_group, age_groups_to_remove in age_group_combiner[country].items():
            max_deaths = max_deaths.append(pd.DataFrame(max_deaths[max_deaths.index.isin(age_groups_to_remove)].sum(), columns=[new_age_group]).T)
            max_deaths.drop(age_groups_to_remove, inplace=True)
            max_deaths.sort_index(inplace=True)
        correction_due_to_aging = calculate_correction_due_to_aging(region, max_deaths)
        max_deaths = calculate_reference_max_deaths(region, correction_due_to_aging, total_deaths)
        

        
        overall_deaths = calculate_overall_covid_deaths(region)
        
#         # max_deaths_correct = max_deaths
#         # max_deaths_correct.columns = [f'W{i:02d}' for i in range(1, 53 + 1)]
#         # y2020 = max_deaths_correct.add_prefix('2020')
#         # y2021 = max_deaths_correct.add_prefix('2021')
#         # y2022 = max_deaths_correct.add_prefix('2022')
#         # max_deaths_concatenated = pd.concat([y2020, y2021, y2022], axis=1)
#         # max_deaths_concatenated = max_deaths_concatenated.loc[:, :overall_deaths.columns[-1]]
#         # max_deaths_concatenated
        
        overall_minus_max = np.maximum(0, overall_deaths - max_deaths).loc[:, :weekly_regional_deaths.columns[-1]]
        age_group_combiner = {'Germany': {'80+': ['80-84', '85+'], '60-79': ['60-64', '65-69', '70-74', '75-79'], '00-59': ['40-44', '45-49', '50-54', '55-59']}}
        for new_age_group, age_groups_to_remove in age_group_combiner[country].items():
            overall_minus_max = overall_minus_max.append(pd.DataFrame(overall_minus_max[overall_minus_max.index.isin(age_groups_to_remove)].sum(), columns=[new_age_group]).T)
            overall_minus_max.drop(age_groups_to_remove, inplace=True)
            overall_minus_max.sort_index(inplace=True)
            
        age_group_combiner = {'Germany': {'00-59': ['00-04', '05-14', '15-34', '35-59']}}
        for new_age_group, age_groups_to_remove in age_group_combiner[country].items():
            weekly_regional_deaths = weekly_regional_deaths.append(pd.DataFrame(weekly_regional_deaths[weekly_regional_deaths.index.isin(age_groups_to_remove)].sum(), columns=[new_age_group]).T)
            weekly_regional_deaths.drop(age_groups_to_remove, inplace=True)
            weekly_regional_deaths.sort_index(inplace=True)   
            
        
        weekly_regional_deaths = weekly_regional_deaths.append(pd.DataFrame(weekly_regional_deaths.sum(axis=0), columns=['TOTAL']).T)
        additional_deaths_not_attributed_to_covid = np.maximum(0, overall_minus_max[weekly_regional_deaths.columns] - weekly_regional_deaths).fillna(0)
        corrected_covid_deaths = (additional_deaths_not_attributed_to_covid * 0.85 + weekly_regional_deaths)
        
#         # Plot results
#         format_label.update_mapping(country=country, running='Ploting result' + ' ' * 30)
        
        # interpolate_timeline_to_days(corrected_covid_deaths, daily_regional_deaths, IFRs, ratios)
        # bar.update()
        # plot_weekly_deaths()
        # bar.update()
        # plot_weekly_cases()
        # bar.update()
        # plot_cumulative_deaths()
        # bar.update()
#         df = pd.DataFrame(columns=['gamma', 'factor 1', 'factor 2', 'f1 - f2'])
#         for index, gamma in enumerate(np.linspace(0, 1, 11)):
#             bar.update()
#             corrected_covid_deaths = additional_deaths_not_attributed_to_covid * gamma + weekly_regional_deaths
#             plot_corrected_covid_deaths()
#         fig, axs = plt.subplots(1, 2, figsize=(30,10))
#         for index, gamma in enumerate(np.linspace(0, 1, 11)):
#             bar.update()
#             corrected_covid_deaths = additional_deaths_not_attributed_to_covid * gamma + weekly_regional_deaths
#             factor_1 = corrected_covid_deaths.iloc[:-1].sum().sum()
#             factor_2 = corrected_covid_deaths.loc['TOTAL'].sum()
#             bar.update()
#             df = df.append(pd.DataFrame({'factor 1': factor_1, 'factor 2': factor_2, 'gamma': gamma, 'f1 - f2': factor_1 - factor_2}, index=[index]))
#             corrected_covid_deaths.iloc[:-1].sum().cumsum().plot(ax=axs[0], label=f'gamma = {gamma}')
#             corrected_covid_deaths.loc['TOTAL'].cumsum().plot(ax=axs[1], label=f'gamma = {gamma}')
            
#         axs[0].legend()
#         axs[0].grid()
#         axs[0].set_title('Factor 1')

#         axs[1].legend()
#         axs[1].grid()
#         axs[1].set_title('Factor 2')
#         plt.savefig(f'{result_dir_path}/{country}/{region}/death_factors.png')
#         df.plot(x='gamma', y=['factor 1', 'factor 2'], grid=True)
#         plt.savefig(f'{result_dir_path}/{country}/{region}/factors.png')
#         df.to_csv(f'{result_dir_path}/{country}/{region}/factors.csv')
        
        
    bar.update(country_index)
bar.finish()
print('Simulation done')

ETA:  --:--:-- Elapsed Time: 0:00:15  (  0%)   Country: Germany Running: Loading prior population                                 

{'DE'}


KeyError: '40-44'

array(['TOTAL', '40-44', '45-49', '50-54', '55-59', '60-64', '65-69',
       '70-74', '75-79', '80-84', '85+'], dtype=object)

In [15]:
        total_deaths = total_deaths.set_index('Age group')    
        age_group_combiner = {'Germany': {'80+': ['80-84', '85+'], '60-79': ['60-64', '65-69', '70-74', '75-79'], '00-59': ['40-44', '45-49', '50-54', '55-59']}}
        for new_age_group, age_groups_to_remove in age_group_combiner[country].items():
            total_deaths = total_deaths.append(pd.DataFrame(total_deaths[total_deaths.index.isin(age_groups_to_remove)].sum(), columns=[new_age_group]).T)
            total_deaths.drop(age_groups_to_remove, inplace=True)
            total_deaths.sort_index(inplace=True)
        total_deaths

Unnamed: 0,Year,Region,1,2,3,4,5,6,7,8,...,44,45,46,47,48,49,50,51,52,53
00-59,2015201620172018201920202021202220172018201920...,DEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDE...,12172,12244,10646,10483,10963,10873,10818,10764,...,9743,9939,10064,10001,10373,10281,10128,10111,9942,3037
60-79,2022201620152021202020192018201720162022202020...,DEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDE...,53062,53677,47342,47182,48491,47863,49089,48923,...,42213,43268,43776,44120,45143,46044,46267,46839,45637,13717
80+,2015201620172018201920202021202220152016201720...,DEDEDEDEDEDEDEDEDEDEDEDEDEDEDEDE,92647,93097,80889,80724,82349,81506,83244,83522,...,70490,72436,73968,75930,77350,79554,79776,81418,81594,25394
TOTAL,2021,DEE,981,1007,1035,934,868,846,765,709,...,683,679,822,806,826,886,886,839,0,0
TOTAL,2017,DED,1235,1347,1229,1231,1391,1369,1406,1390,...,982,964,1022,1036,1053,1067,1061,1101,1108,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TOTAL,2015,DE9,1898,1987,1888,1963,2019,2012,2213,2236,...,1715,1728,1703,1762,1701,1743,1753,1743,1742,1742
TOTAL,2016,DE9,2014,1932,1922,1966,1895,1858,1906,1823,...,1759,1840,1873,1725,1872,1922,1852,2020,1942,0
TOTAL,2017,DE9,2105,2160,2064,2087,2277,2208,2317,2346,...,1712,1718,1778,1804,1829,1843,1903,1878,1871,0
TOTAL,2017,DE5,176,178,166,184,179,185,181,209,...,144,159,150,160,127,164,162,141,154,0
