In [1]:
import pandas as pd
import os
import numpy as np
import epiweeks
import datetime
import requests
import zipfile
import csv
from contextlib import ExitStack
import progressbar as pbar
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import json
import warnings
warnings.filterwarnings('ignore')

In [2]:
def load_cases(country, sex='b'):
    translation_age = {0: '00-04', 5: '05-09',
                       10: '10-14', 15: '15-19',
                       20: '20-24', 25: '25-29',
                       30: '30-34', 35: '35-39',
                       40: '40-44', 45: '45-49',
                       50: '50-54', 55: '55-59',
                       60: '60-64', 65: '65-69',
                       70: '70-74', 75: '75-79',
                       80: '80-84', 85: '85-89',
                       90: '90-94', 95: '95-99',
                       100: '100-104'}
    cases = pd.read_csv(f'{data_dir_path}/{country}/cases_5.csv', encoding='latin1')
    cases['Date'] =pd.to_datetime(cases.Date, format='%d.%m.%Y')
    cases = cases.sort_values(by=['Date', 'Region'])
    cases = cases.drop('Code', axis='columns')
    cases['Region'].replace({'All': country}, inplace=True)
    cases = cases[cases['Sex'] == sex]
    cases = cases.drop(['Country', 'AgeInt', 'Sex'], axis='columns')
    cases['Age'].replace(translation_age, inplace=True)
    cases.rename(columns={'Age': 'Age group', 'Region': 'location_name', 'Date': 'date', 'Cases': 'value'}, inplace=True)
    cases = cases.reindex(['date', 'location_name', 'Age group', 'value'], axis=1).fillna(0)
    return cases

In [3]:
def load_deaths(country, sex='b'):
    translation_age = {0: '00-04', 5: '05-09',
                       10: '10-14', 15: '15-19',
                       20: '20-24', 25: '25-29',
                       30: '30-34', 35: '35-39',
                       40: '40-44', 45: '45-49',
                       50: '50-54', 55: '55-59',
                       60: '60-64', 65: '65-69',
                       70: '70-74', 75: '75-79',
                       80: '80-84', 85: '85-89',
                       90: '90-94', 95: '95-99',
                       100: '100-104'}
    deaths = pd.read_csv(f'{data_dir_path}/{country}/deaths_5.csv', encoding='latin1')
    deaths['Date'] =pd.to_datetime(deaths.Date, format='%d.%m.%Y')
    deaths = deaths.sort_values(by=['Date', 'Region'])
    deaths = deaths.drop('Code', axis='columns')
    deaths['Region'].replace({'All': country}, inplace=True)
    deaths = deaths[deaths['Sex'] == sex]
    deaths = deaths.drop(['Country', 'AgeInt', 'Sex'], axis='columns')
    deaths['Age'].replace(translation_age, inplace=True)
    deaths.rename(columns={'Age': 'Age group', 'Region': 'location_name', 'Date': 'date', 'Deaths': 'value'}, inplace=True)
    deaths = deaths.reindex(['date', 'location_name', 'Age group', 'value'], axis=1).fillna(0)
    return deaths

In [4]:
def prepare_weekly_covid_deaths(region: str):
    daily_regional_deaths = deaths[deaths['location_name'] == region].reset_index(drop=True)
    daily_regional_deaths = daily_regional_deaths.pivot(values='value', columns='date', index='Age group').fillna(0)
    daily_regional_deaths[daily_regional_deaths < 0] = 0

    weekly_regional_deaths = daily_regional_deaths.copy()
    [str(c) for c in daily_regional_deaths.columns]
    weekly_regional_deaths.columns = [epiweeks.Week.fromdate(datetime.date(year=int(str(c)[:4]), month=int(str(c)[5:7]), day=int(str(c)[8:10]))).isoformat() for c in daily_regional_deaths.columns]           
    t = weekly_regional_deaths.transpose()
    
    weekly_regional_deaths = t.groupby(t.index).sum().transpose()
    return daily_regional_deaths, weekly_regional_deaths

In [5]:
def prepare_weekly_covid_cases(region: str):
    daily_regional_cases = cases[cases['location_name'] == region].reset_index(drop=True)
    daily_regional_cases = daily_regional_cases.pivot(values='value', index='Age group', columns='date').fillna(0)
    daily_regional_cases[daily_regional_cases < 0] = 0

    weekly_regional_cases = daily_regional_cases.copy()
    weekly_regional_cases.columns = [epiweeks.Week.fromdate(datetime.date(year=int(str(c)[:4]), month=int(str(c)[5:7]), day=int(str(c)[8: 10]))).isoformat() for c in daily_regional_cases.columns]
    t = weekly_regional_cases.transpose()
    weekly_regional_covid_cases = t.groupby(t.index).sum().transpose()
    return weekly_regional_covid_cases

In [6]:
def prepare_regional_deaths(region: str):
    max_deaths = total_deaths[np.logical_and(total_deaths['Region'] == region, total_deaths['Year'] < str(2020))]
    max_deaths = max_deaths.groupby('Age group').apply(max)[max_deaths.columns[3:]]
    return max_deaths

In [7]:
def calculate_correction_due_to_aging(region: str, max_deaths):
    age_profile_group = {}
    lower_bound = 0
    for index in max_deaths.index:
        if index == '85+':
            age_profile_group[index] = population[population['Age'] >= 85][region].sum()
        elif index == 'TOTAL':
            age_profile_group['TOTAL'] = population[region].sum()
        else:
            upper_bound = int(index[3:])
            lower_bound = int(index[:2])
            age_profile_group[index] = population[np.logical_and(population['Age'] >= lower_bound, population['Age'] <= upper_bound)][region].sum()

    age_profile = population[region]
    age_profile_group_old = {k: v.set_index('Age group')[region] for k, v in prior_populations.items() if k > 2015}
    correction_due_to_aging = {year: {age_group: age_profile_group_old[2016][age_group] / age_profile_group[age_group] for age_group in age_profile_group.keys()} for year in age_profile_group_old.keys()}
    return correction_due_to_aging

In [8]:
def calculate_reference_max_deaths(region, correction_due_to_aging, total_deaths):
    total_deaths = total_deaths[total_deaths['Year'] > str(2015)]
    max_deaths_correct = total_deaths[np.logical_and(total_deaths['Region'] == region, total_deaths['Year'] < '2020')].drop(columns=['Region']).reset_index(drop=True)
    max_deaths_correct['Multiplier'] = max_deaths_correct[['Year', 'Age group']].apply(lambda x: correction_due_to_aging[int(x[0])][x[1]], axis=1)

    for col in range(1, 52 + 1):
        max_deaths_correct[col] = max_deaths_correct[col] / max_deaths_correct['Multiplier']
    max_deaths_correct = max_deaths_correct.groupby('Age group').apply(max)[max_deaths_correct.columns[2:-2]]
    max_deaths_correct.columns = [f'W{i:02d}' for i in range(1, 52 + 1)]
    y2020 = max_deaths_correct.add_prefix('2020')
    y2021 = max_deaths_correct.add_prefix('2021')
    y2022 = max_deaths_correct.add_prefix('2022')
    y2020['2020W53'] = np.maximum(max_deaths_correct['W01'], max_deaths_correct['W52'])
    max_deaths_concatenated = pd.concat([y2020, y2021, y2022], axis=1)
    max_deaths_concatenated[weekly_regional_deaths.columns]
    return max_deaths_concatenated

In [9]:
def calculate_overall_covid_deaths(region):
    overall_deaths = total_deaths[np.logical_and(total_deaths['Region'] == region, total_deaths['Year'] >= str(2020))].drop(columns=['Region']).reset_index(drop=True)
    y2020 = overall_deaths[overall_deaths['Year'] == '2020'].reset_index(drop=True)[overall_deaths.columns[1:]]
    y2020.columns = ['Age group'] + [f'2020W{i:02d}' for i in range(1, 53 + 1)]
    y2021 = overall_deaths[overall_deaths['Year'] == '2021'].reset_index(drop=True)[overall_deaths.columns[1:-1]]
    y2021.columns = ['Age group'] + [f'2021W{i:02d}' for i in range(1, 52 + 1)]
    y2022 = overall_deaths[overall_deaths['Year'] == '2021'].reset_index(drop=True)[overall_deaths.columns[1:-1]]
    y2022.columns = ['Age group'] + [f'2022W{i:02d}' for i in range(1, 52 + 1)]
    overall_deaths = pd.concat([y2020[y2020.columns[1:]], y2021, y2022], axis=1)
    overall_deaths.index = y2020['Age group']
    overall_deaths = overall_deaths.sort_index()
    return overall_deaths

In [10]:
def plot_weekly_deaths():
    fig, axs = plt.subplots(2, 2, figsize=(30,20))
    ((weekly_regional_deaths_cumulative.iloc[:, :-1] - weekly_regional_deaths_cumulative.iloc[:, :-1].shift(periods=1, axis='columns', fill_value=0)) / 7).T.plot(ax=axs[0, 0])
    (weekly_regional_deaths_cumulative.iloc[:, :-1] / 7).T.plot(ax=axs[0, 1])
    ((weekly_regional_deaths_cumulative.iloc[:, :-1] - weekly_regional_deaths_cumulative.iloc[:, :-1].shift(periods=1, axis='columns', fill_value=0)) / 7).sum().T.plot(ax=axs[1, 0], label='Total')
    (weekly_regional_deaths_cumulative.iloc[:, :-1] / 7).sum().T.plot(ax=axs[1, 1], label='Total')
    axs[0, 0].set_title(f'Weekly Covid deaths recorded by week in age groups ({country} - {region})')
    axs[0, 0].set_xlabel('week')
    axs[0, 0].set_ylabel('deaths')

    axs[0, 1].set_title(f'Cumulative Covid deaths recorded by week in age groups ({country} - {region})')
    axs[0, 1].set_xlabel('week')
    axs[0, 1].set_ylabel('deaths')

    axs[1, 0].set_title(f'Weekly Covid deaths recorded by week in total ({country} - {region})')
    axs[1, 0].legend()
    axs[1, 0].set_xlabel('week')
    axs[1, 0].set_ylabel('deaths')

    axs[1, 1].set_title(f'Cumulative Covid deaths recorded by week in total ({country} - {region})')
    axs[1, 1].legend()
    axs[1, 1].set_xlabel('week')
    axs[1, 1].set_ylabel('deaths')
    
    axs[0, 0].grid()
    axs[0, 1].grid()
    axs[1, 0].grid()
    axs[1, 1].grid()
    

    plt.savefig(f'{result_dir_path}/{country}/{region}/cumulative_covid_deaths_reported.png')
    plt.close('all')

In [11]:
def plot_weekly_cases():
    fig, axs = plt.subplots(2, 2, figsize=(30,20))
    ((weekly_regional_covid_cases.iloc[:, :-1] - weekly_regional_covid_cases.iloc[:, :-1].shift(periods=1, axis='columns', fill_value=0)) / 7).T.plot(ax=axs[0, 0])
    (weekly_regional_covid_cases.iloc[:, :-1] / 7).T.plot(ax=axs[0, 1])
    ((weekly_regional_covid_cases.iloc[:, :-1] - weekly_regional_covid_cases.iloc[:, :-1].shift(periods=1, axis='columns', fill_value=0)) / 7).sum().T.plot(ax=axs[1, 0], label='Total')
    (weekly_regional_covid_cases.iloc[:, :-1] / 7).sum().T.plot(ax=axs[1, 1], label='Total')
    axs[0, 0].set_title(f'Weekly Covid cases recorded by week in age groups ({country} - {region})')
    axs[0, 0].set_xlabel('week')
    axs[0, 0].set_ylabel('deaths')

    axs[0, 1].set_title(f'Cumulative Covid cases recorded by week in age groups ({country} - {region})')
    axs[0, 1].set_xlabel('week')
    axs[0, 1].set_ylabel('deaths')

    axs[1, 0].set_title(f'Weekly Covid cases recorded by week in total ({country} - {region})')
    axs[1, 0].legend()
    axs[1, 0].set_xlabel('week')
    axs[1, 0].set_ylabel('deaths')

    axs[1, 1].set_title(f'Cumulative Covid cases recorded by week in total ({country} - {region})')
    axs[1, 1].legend()
    axs[1, 1].set_xlabel('week')
    axs[1, 1].set_ylabel('deaths')
    
    axs[0, 0].grid()
    axs[0, 1].grid()
    axs[1, 0].grid()
    axs[1, 1].grid()

    plt.savefig(f'{result_dir_path}/{country}/{region}/cumulative_covid_cases_reported.png')
    plt.close('all')

In [12]:
def plot_weekly_deaths_with_max_deaths():
    fig, axs = plt.subplots(2, 2, figsize=(30,20))
    ((weekly_regional_deaths_cumulative.iloc[:, :-1] - weekly_regional_deaths_cumulative.iloc[:, :-1].shift(periods=1, axis='columns', fill_value=0)) / 7).T.plot(ax=axs[0, 0])
    (weekly_regional_deaths_cumulative.iloc[:, :-1] / 7).T.plot(ax=axs[0, 1])
    ((weekly_regional_deaths_cumulative.iloc[:, :-1] - weekly_regional_deaths_cumulative.iloc[:, :-1].shift(periods=1, axis='columns', fill_value=0)) / 7).sum().T.plot(ax=axs[1, 0], label='Total')
    (weekly_regional_deaths_cumulative.iloc[:, :-1] / 7).sum().T.plot(ax=axs[1, 1], label='Total')
    axs[0, 0].set_title(f'Weekly Covid deaths recorded by week in age groups ({country} - {region})')
    axs[0, 0].set_xlabel('week')
    axs[0, 0].set_ylabel('deaths')

    axs[0, 1].set_title(f'Cumulative Covid deaths recorded by week in age groups ({country} - {region})')
    axs[0, 1].set_xlabel('week')
    axs[0, 1].set_ylabel('deaths')

    axs[1, 0].set_title(f'Weekly Covid deaths recorded by week in total ({country} - {region})')
    axs[1, 0].legend()
    axs[1, 0].set_xlabel('week')
    axs[1, 0].set_ylabel('deaths')

    axs[1, 1].set_title(f'Cumulative Covid deaths recorded by week in total ({country} - {region})')
    axs[1, 1].legend()
    axs[1, 1].set_xlabel('week')
    axs[1, 1].set_ylabel('deaths')
    
    axs[0, 0].grid()
    axs[0, 1].grid()
    axs[1, 0].grid()
    axs[1, 1].grid()
    

    plt.savefig(f'{result_dir_path}/{country}/{region}/cumulative_covid_deaths_reported.png')
    plt.close('all')

In [13]:
def plot_cumulative_deaths():
    max_deaths_new = pd.DataFrame(max_deaths.iloc[:-1].sum(), index=max_deaths.columns, columns=['Total']).T
    for age_group in max_deaths.index:
        fig, axs = plt.subplots(2, 2, figsize=(30,20))
        max_deaths.loc[age_group, weekly_regional_deaths.loc[age_group, :'2020W52'].index][:-1].T.plot(ax=axs[0, 0], label='max deaths')
        
        if age_group == 'TOTAL':
            max_deaths_new.loc[:, :'2020W52'].T.plot(ax=axs[0, 0], label='max deaths')
            
        
        (max_deaths.loc[age_group, weekly_regional_deaths.loc[age_group, :'2020W52'].index] + weekly_regional_deaths.loc[age_group, :'2020W52'].to_numpy()).T.plot(ax=axs[0, 0], label='max deaths + covid deaths (reported)')
        total_deaths[np.logical_and(total_deaths['Year'] == '2020', total_deaths['Age group'] == age_group)].drop(['Year', 'Region', 'Age group'],  axis='columns').iloc[:, :-1].T.plot(ax=axs[0, 0], label='overall deaths')
        axs[0, 0].legend(['max deaths', 'max deaths + covid deaths (reported)', 'overall deaths'])
        
        if age_group == 'TOTAL':
            axs[0, 0].legend(['max deaths - max(sum)', 'max deaths - sum(max)', 'max deaths + covid deaths (reported)', 'overall deaths'])

        axs[0, 0].set_title(f'2020. Age groups: {age_group} ({country} - {region})')
        axs[0, 0].set_xlabel('week')
        axs[0, 0].set_ylabel('deaths')
        axs[0, 0].grid()
        
        max_deaths.loc[age_group, weekly_regional_deaths.loc[age_group, '2021W01':'2021W52'].index][:-1].T.plot(ax=axs[0, 1], label='max deaths')
        
        if age_group == 'TOTAL':
            max_deaths_new.loc[:, '2021W01':'2021W52'].T.plot(ax=axs[0, 1], label='max deaths')
                              
        
        (max_deaths.loc[age_group, weekly_regional_deaths.loc[age_group, '2021W01':'2021W52'].index] + weekly_regional_deaths.loc[age_group, '2021W01':'2021W52'].to_numpy()).T.plot(ax=axs[0, 1], label='max deaths + covid deaths (reported)')
        total_deaths[np.logical_and(total_deaths['Year'] == '2021', total_deaths['Age group'] == age_group)].drop(['Year', 'Region', 'Age group'],  axis='columns').iloc[:, :-1].T.plot(ax=axs[0, 1], label='overall deaths')
        axs[0, 1].legend(['max deaths', 'max deaths + covid deaths (reported)', 'overall deaths'])
        
        if age_group == 'TOTAL':
            axs[0, 1].legend(['max deaths - max(sum)', 'max deaths - sum(max)', 'max deaths + covid deaths (reported)', 'overall deaths'])
            
        axs[0, 1].set_title(f'2021. Age groups: {age_group} ({country} - {region})')
        axs[0, 1].set_xlabel('week')
        axs[0, 1].set_ylabel('deaths')
        axs[0, 1].grid()
        
        max_deaths.loc[age_group, weekly_regional_deaths.loc[age_group, :'2020W52'].index][:-1].cumsum().T.plot(ax=axs[1, 0], label='max deaths')
        
        if age_group == 'TOTAL':
            max_deaths_new.loc[:, :'2020W52'].cumsum(axis=1).T.plot(ax=axs[1, 0], label='max deaths')
        
        (max_deaths.loc[age_group, weekly_regional_deaths.loc[age_group, :'2020W52'].index] + weekly_regional_deaths.loc[age_group, :'2020W52'].to_numpy()).cumsum().T.plot(ax=axs[1, 0], label='max deaths + covid deaths (reported)')
        total_deaths[np.logical_and(total_deaths['Year'] == '2020', total_deaths['Age group'] == age_group)].drop(['Year', 'Region', 'Age group'],  axis='columns').iloc[:, :-1].cumsum(axis=1).T.plot(ax=axs[1, 0], label='overall deaths')
        axs[1, 0].legend(['max deaths', 'max deaths + covid deaths (reported)', 'overall deaths'])
        
        if age_group == 'TOTAL':
            axs[1, 0].legend(['max deaths - max(sum)', 'max deaths - sum(max)', 'max deaths + covid deaths (reported)', 'overall deaths'])
            
        axs[1, 0].set_title(f'2020. Cumulative. Age groups: {age_group} ({country} - {region})')
        axs[1, 0].set_xlabel('week')
        axs[1, 0].set_ylabel('deaths')
        axs[1, 0].grid()
        
        max_deaths.loc[age_group, weekly_regional_deaths.loc[age_group, '2021W01':'2021W52'].index][:-1].cumsum().T.plot(ax=axs[1, 1], label='max deaths')
        
        if age_group == 'TOTAL':
            max_deaths_new.loc[:, '2021W01':'2021W52'].cumsum(axis=1).T.plot(ax=axs[1, 1], label='max deaths')
            
        
        (max_deaths.loc[age_group, weekly_regional_deaths.loc[age_group, '2021W01':'2021W52'].index] + weekly_regional_deaths.loc[age_group, '2021W01':'2021W52'].to_numpy()).cumsum().T.plot(ax=axs[1, 1], label='max deaths + covid deaths (reported)')
        total_deaths[np.logical_and(total_deaths['Year'] == '2021', total_deaths['Age group'] == age_group)].drop(['Year', 'Region', 'Age group'],  axis='columns').iloc[:, :-1].cumsum(axis=1).T.plot(ax=axs[1, 1], label='overall deaths')
        axs[1, 1].legend(['max deaths', 'max deaths + covid deaths (reported)', 'overall deaths'])
        
        if age_group == 'TOTAL':
            axs[1, 1].legend(['max deaths - max(sum)', 'max deaths - sum(max)', 'max deaths + covid deaths (reported)', 'overall deaths'])    
            
        axs[1, 1].set_title(f'2021. Cumulative. Age groups: {age_group} ({country} - {region})')
        axs[1, 1].set_xlabel('week')
        axs[1, 1].set_ylabel('deaths')
        axs[1, 1].grid()
        plt.savefig(f'{result_dir_path}/{country}/{region}/Overall_deaths_{age_group}.png')
        plt.close('all')

In [14]:
def plot_corrected_covid_deaths():
    if not os.path.exists(f'{result_dir_path}/{country}/{region}/{gamma:.2f}'):
        os.mkdir(f'{result_dir_path}/{country}/{region}/{gamma:.2f}')
    for age_group in max_deaths.index:
        fig, axs = plt.subplots(2, 2, figsize=(30,20))
        weekly_regional_deaths.loc[age_group, :'2020W52'].T.plot(ax=axs[0, 0])
        corrected_covid_deaths.loc[age_group, :'2020W52'].T.plot(ax=axs[0, 0])
        additional_deaths_not_attributed_to_covid.loc[age_group, :'2020W52'].T.plot(ax=axs[0, 0])
        axs[0, 0].legend(['covid deaths (reported)', f'corrected covid deaths {gamma:.2f}', 'excess deaths'])
        axs[0, 0].set_title(f'2020. Age groups: {age_group} ({country} - {region})')
        axs[0, 0].set_xlabel('week')
        axs[0, 0].set_ylabel('deaths')
        axs[0, 0].grid()
        
        weekly_regional_deaths.loc[age_group, '2021W01':'2021W52'].T.plot(ax=axs[0, 1])
        corrected_covid_deaths.loc[age_group, '2021W01':'2021W52'].T.plot(ax=axs[0, 1])
        additional_deaths_not_attributed_to_covid.loc[age_group, '2021W01':'2021W52'].T.plot(ax=axs[0, 1])
        axs[0, 1].legend(['covid deaths (reported)', f'corrected covid deaths {gamma:.2f}', 'excess deaths'])
        axs[0, 1].set_title(f'2021. Age groups: {age_group} ({country} - {region})')
        axs[0, 1].set_xlabel('week')
        axs[0, 1].set_ylabel('deaths')
        axs[0, 1].grid()
        
        weekly_regional_deaths.loc[age_group, :'2020W52'].cumsum().T.plot(ax=axs[1, 0])
        corrected_covid_deaths.loc[age_group, :'2020W52'].cumsum().T.plot(ax=axs[1, 0])
        additional_deaths_not_attributed_to_covid.loc[age_group, :'2020W52'].cumsum().T.plot(ax=axs[1, 0])
        axs[1, 0].legend(['covid deaths (reported)', f'corrected covid deaths {gamma:.2f}', 'excess deaths'])
        axs[1, 0].set_title(f'2020. Age groups: {age_group} ({country} - {region})')
        axs[1, 0].set_xlabel('week')
        axs[1, 0].set_ylabel('deaths')
        axs[1, 0].grid()
        
        weekly_regional_deaths.loc[age_group, '2021W01':'2021W52'].cumsum().T.plot(ax=axs[1, 1])
        corrected_covid_deaths.loc[age_group, '2021W01':'2021W52'].cumsum().T.plot(ax=axs[1, 1])
        additional_deaths_not_attributed_to_covid.loc[age_group, '2021W01':'2021W52'].cumsum().T.plot(ax=axs[1, 1])
        axs[1, 1].legend(['covid deaths (reported)', f'corrected covid deaths {gamma:.2f}', 'excess deaths'])
        axs[1, 1].set_title(f'2021. Age groups: {age_group} ({country} - {region})')
        axs[1, 1].set_xlabel('week')
        axs[1, 1].set_ylabel('deaths')
        axs[1, 1].grid()
        plt.savefig(f'{result_dir_path}/{country}/{region}/{gamma:.2f}/Covid_deaths_gamma_{gamma:.2f}_age_group_{age_group}.png')
        plt.close('all')

In [15]:
def interpolate_timeline_to_days(corrected_covid_deaths, daily_regional_deaths, ifr, ratios=None):
    filter_cols = []
    corrected_covid_deaths_regional_daily = corrected_covid_deaths.copy()
    for week in corrected_covid_deaths.columns:
        epi = epiweeks.Week.fromstring(week)
        for day in epi.iterdates():
            filter_cols.append(day)
            corrected_covid_deaths_regional_daily[day] = corrected_covid_deaths[week] / 7
    corrected_covid_deaths_regional_daily = corrected_covid_deaths_regional_daily[filter_cols]

    # apply delay from detection to death
    if ratios is not None:
        detections_time_of_corrected_covid_deaths_regional_daily = corrected_covid_deaths_regional_daily.copy()
        for i, row in detections_time_of_corrected_covid_deaths_regional_daily.iterrows():
            detections_time_of_corrected_covid_deaths_regional_daily.loc[i] = np.correlate(row.to_numpy(), ratios, 'full')[79:-4]
        else:
            detections_time_of_corrected_covid_deaths_regional_daily = corrected_covid_deaths_regional_daily.copy()

    previously_infected = detections_time_of_corrected_covid_deaths_regional_daily.copy()
    cases_regional_filtered = daily_regional_deaths
    to_calculation = None
    for key, value in IFRs.items():
        to_calculation = list(value.keys())
    for condition in previously_infected.index:
        if condition not in to_calculation:
            previously_infected.drop(previously_infected.loc[previously_infected.index == condition].index, inplace=True)
            cases_regional_filtered.drop(cases_regional_filtered.loc[cases_regional_filtered.index == condition].index, inplace=True)
    cases_regional_filtered = cases_regional_filtered.transpose()
    previously_infected = previously_infected.transpose()

    fig, axes = plt.subplots(5, len(previously_infected.columns), figsize=(20 * len(previously_infected.columns), 35))
    for j, (k, ifr_details) in enumerate(ifr.items()):
        ax = axes[j]
        for i, col in enumerate(previously_infected.columns):
            (previously_infected[col] / ifr_details[col][1]).cumsum().plot(ax=ax[i], label=f'{col} estimate')
            (previously_infected[col] / ifr_details[col][0]).cumsum().plot(ax=ax[i], label=f'{col} upper bound (95% conf. interv.)')
            (previously_infected[col] / ifr_details[col][2]).cumsum().plot(ax=ax[i], label=f'{col} lower bound (95% conf. interv.)')
            (cases_regional_filtered[col]).cumsum().plot(ax=ax[i], label=f'{col} detected cases')
            ax[i].legend(loc='upper left')
            ax[i].grid()
            ax[i].set_ylabel('fraction of age group population')
            ax[i].set_title(f'IFR by {k} (age group {col})')
            fig.autofmt_xdate()
            ax[i].set_ylim([0, None])
    plt.savefig(f'{result_dir_path}/{country}/{region}/fraction of previously infected.png')

    fig, axes = plt.subplots(5, len(previously_infected.columns), figsize=(20 * len(previously_infected.columns), 35))
    for j, (k, ifr_details) in enumerate(ifr.items()):
        ax = axes[j]
        for i, col in enumerate(previously_infected.columns):
            (previously_infected[col] / ifr_details[col][1]).plot(ax=ax[i], label=f'{col} estimate')
            (previously_infected[col] / ifr_details[col][0]).plot(ax=ax[i], label=f'{col} upper bound (95% conf. interv.)')
            (previously_infected[col] / ifr_details[col][2]).plot(ax=ax[i], label=f'{col} lower bound (95% conf. interv.)')
            (cases_regional_filtered[col]).rolling(7).mean().plot(ax=ax[i], label=f'{col} detected cases')
            ax[i].legend(loc='upper left')
            ax[i].grid()
            ax[i].set_ylabel('Fraction of age group population')
            ax[i].set_title(f'IFR by {k} (age group {col})')
            fig.autofmt_xdate()
            ax[i].set_ylim([0, None])
    plt.savefig(f'{result_dir_path}/{country}/{region}/fraction of previously infected timeline.png')

    fig, axes = plt.subplots(5, len(previously_infected.columns), figsize=(20 * len(previously_infected.columns), 35))
    for j, (k, ifr_details) in enumerate(ifr.items()):
        ax = axes[j]
        for i, col in enumerate(previously_infected.columns):
            (previously_infected[col] / ifr_details[col][1]).cumsum().plot(ax=ax[i], label=f'{col} estimate')
            (previously_infected[col] / ifr_details[col][0]).cumsum().plot(ax=ax[i], label=f'{col} upper bound (95% conf. interv.)')
            (previously_infected[col] / ifr_details[col][2]).cumsum().plot(ax=ax[i], label=f'{col} lower bound (95% conf. interv.)')
            (cases_regional_filtered[col]).cumsum().plot(ax=ax[i], label=f'{col} detected cases')
            ax[i].legend(loc='upper left')
            ax[i].grid()
            ax[i].set_ylabel('Number of people')
            ax[i].set_title(f'IFR by {k} (age group {col})')
            fig.autofmt_xdate()
            ax[i].set_ylim([0, None])
    plt.savefig(f'{result_dir_path}/{country}/{region}/number of previously infected.png')

    fig, axes = plt.subplots(5, len(previously_infected.columns), figsize=(20 * len(previously_infected.columns), 35))
    for j, (k, ifr_details) in enumerate(ifr.items()):
        ax = axes[j]
        for i, col in enumerate(previously_infected.columns):
            (previously_infected[col] / ifr_details[col][1]).plot(ax=ax[i], label=f'{col} estimate')
            (previously_infected[col] / ifr_details[col][0]).plot(ax=ax[i], label=f'{col} upper bound (95% conf. interv.)')
            (previously_infected[col] / ifr_details[col][2]).plot(ax=ax[i], label=f'{col} lower bound (95% conf. interv.)')
            (cases_regional_filtered[col]).rolling(7).mean().plot(ax=ax[i], label=f'{col} detected cases')
            ax[i].legend(loc='upper left')
            ax[i].grid()
            ax[i].set_ylabel('Number of people')
            ax[i].set_title(f'IFR by {k} (age group {col})')
            fig.autofmt_xdate()
            ax[i].set_ylim([0, None])
    plt.savefig(f'{result_dir_path}/{country}/{region}/number of previously infected timeline.png')

    fig, axes = plt.subplots(5, len(previously_infected.columns), figsize=(20 * len(previously_infected.columns), 35))
    for j, (k, ifr_details) in enumerate(ifr.items()):
        ax = axes[j]
        for i, col in enumerate(previously_infected.columns):
            (previously_infected[col] / ifr_details[col][1] / (cases_regional_filtered[col]).rolling(7).mean()).plot(ax=ax[i], label=f'{col} estimate')
            (previously_infected[col] / ifr_details[col][0] / (cases_regional_filtered[col]).rolling(7).mean()).plot(ax=ax[i], label=f'{col} upper bound (95% conf. interv.)')
            (previously_infected[col] / ifr_details[col][2] / (cases_regional_filtered[col]).rolling(7).mean()).plot(ax=ax[i], label=f'{col} lower bound (95% conf. interv.)')
            ax[i].legend(loc='upper left')
            ax[i].grid()
            ax[i].set_ylabel('Dark figure')
            ax[i].set_title(f'IFR by {k} (age group {col})')
            fig.autofmt_xdate()
            ax[i].set_ylim([0, None])
    plt.savefig(f'{result_dir_path}/{country}/{region}/darkfigure of previously infected timeline.png')

    fig, axes = plt.subplots(5, len(previously_infected.columns), figsize=(20 * len(previously_infected.columns), 35))
    for j, (k, ifr_details) in enumerate(ifr.items()):
        ax = axes[j]
        for i, col in enumerate(previously_infected.columns):
            ((previously_infected[col] / ifr_details[col][1]).cumsum() / (cases_regional_filtered[col].cumsum())).plot(ax=ax[i], label=f'{col} estimate')
            ((previously_infected[col] / ifr_details[col][0]).cumsum() / (cases_regional_filtered[col].cumsum())).plot(ax=ax[i], label=f'{col} upper bound (95% conf. interv.)')
            ((previously_infected[col] / ifr_details[col][2]).cumsum() / (cases_regional_filtered[col].cumsum())).plot(ax=ax[i], label=f'{col} lower bound (95% conf. interv.)')
            ax[i].legend(loc='upper left')
            ax[i].grid()
            ax[i].set_ylabel('Dark figure')
            ax[i].set_title(f'IFR by {k} (age group {col})')
            fig.autofmt_xdate()
            ax[i].set_ylim([0, None])
    plt.savefig(f'{result_dir_path}/{country}/{region}/cumulative darkfigure of previously infected.png')

In [16]:
today = datetime.date.today().strftime("%d-%m-%Y")
today = '30-01-2022'
data_dir_path = f'data/World/{today}'
result_dir_path = f'result/World/{today}'
if not os.path.exists(f'{result_dir_path}'):
    os.mkdir(f'{result_dir_path}')

with open(f'{data_dir_path}/countries.json', 'r') as file:
    countries = json.load(file)
format_label = pbar.FormatCustomText('Country: %(country)s Running: %(running)s   ', dict(country='start', running='start'))
widgets = [pbar.ETA(), ' ', pbar.Timer(), ' ', pbar.Percentage(format=' (%(percentage)3d%%) '), '  ', format_label]
bar = pbar.ProgressBar(widgets=widgets, maxval=len(countries))
bar.start()

# Data from Germany
ratios = pd.read_csv('./data/Germany/positive_test_to_death_days_distribution.csv')
ratios.index = ratios['offset']
ratios = ratios['probs']
IFRs = {
    'O\'Driscoll': {
         '00-04': [0.122, 0.115, 0.128],
         '05-09': [0.122, 0.115, 0.128],
         '10-14': [0.122, 0.115, 0.128],
         '15-19': [0.122, 0.115, 0.128],
         '20-24': [0.122, 0.115, 0.128],
         '25-29': [0.122, 0.115, 0.128],
         '30-34': [0.122, 0.115, 0.128],
         '35-39': [0.122, 0.115, 0.128],
         '40-44': [0.122, 0.115, 0.128],
         '45-49': [0.122, 0.115, 0.128],
         '50-54': [0.122, 0.115, 0.128],
         '55-59': [0.122, 0.115, 0.128],
         '60-64': [0.992, 0.942, 1.045],
         '65-69': [0.992, 0.942, 1.045],
         '70-74': [0.992, 0.942, 1.045],
         '75-79': [0.992, 0.942, 1.045],
         '80-84': [7.274, 6.909, 7.656],
         '85+': [7.274, 6.909, 7.656]
    },
    'Verity': {
         '00-04': [0.349, 0.194, 0.743],
         '05-09': [0.349, 0.194, 0.743],
         '10-14': [0.349, 0.194, 0.743],
         '15-19': [0.349, 0.194, 0.743],
         '20-24': [0.349, 0.194, 0.743],
         '25-29': [0.349, 0.194, 0.743],
         '30-34': [0.349, 0.194, 0.743],
         '35-39': [0.349, 0.194, 0.743],
         '40-44': [0.349, 0.194, 0.743],
         '45-49': [0.349, 0.194, 0.743],
         '50-54': [0.349, 0.194, 0.743],
         '55-59': [0.349, 0.194, 0.743],
         '60-64': [2.913, 1.670, 5.793],
         '65-69': [2.913, 1.670, 5.793],
         '70-74': [2.913, 1.670, 5.793],
         '75-79': [2.913, 1.670, 5.793],
         '80-84': [7.800, 3.800, 13.30],
         '85+': [7.800, 3.800, 13.30]
    },
    'Perez-Saez': {
         '00-04': [0.070, 0.047, 0.097],
         '05-09': [0.070, 0.047, 0.097],
         '10-14': [0.070, 0.047, 0.097],
         '15-19': [0.070, 0.047, 0.097],
         '20-24': [0.070, 0.047, 0.097],
         '25-29': [0.070, 0.047, 0.097],
         '30-34': [0.070, 0.047, 0.097],
         '35-39': [0.070, 0.047, 0.097],
         '40-44': [0.070, 0.047, 0.097],
         '45-49': [0.070, 0.047, 0.097],
         '50-54': [0.070, 0.047, 0.097],
         '55-59': [0.070, 0.047, 0.097],
         '60-64': [3.892, 2.985, 5.145],
         '65-69': [3.892, 2.985, 5.145],
         '70-74': [3.892, 2.985, 5.145],
         '75-79': [3.892, 2.985, 5.145],
         '80-84': [5.600, 4.300, 7.400],
         '85+': [5.600, 4.300, 7.400]
    },
    'Levin': {
         '00-04': [0.226, 0.212, 0.276],
         '05-09': [0.226, 0.212, 0.276],
         '10-14': [0.226, 0.212, 0.276],
         '15-19': [0.226, 0.212, 0.276],
         '20-24': [0.226, 0.212, 0.276],
         '25-29': [0.226, 0.212, 0.276],
         '30-34': [0.226, 0.212, 0.276],
         '35-39': [0.226, 0.212, 0.276],
         '40-44': [0.226, 0.212, 0.276],
         '45-49': [0.226, 0.212, 0.276],
         '50-54': [0.226, 0.212, 0.276],
         '55-59': [0.226, 0.212, 0.276],
         '60-64': [2.491, 2.294, 3.266],
         '65-69': [2.491, 2.294, 3.266],
         '70-74': [2.491, 2.294, 3.266],
         '75-79': [2.491, 2.294, 3.266],
         '80-84': [15.61, 12.20, 19.50],
         '85+': [15.61, 12.20, 19.50]
    },
    'Driscoll (ours)': {
         '00-04': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '05-09': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '10-14': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '15-19': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '20-24': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '25-29': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '30-34': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '35-39': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '40-44': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '45-49': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '50-54': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '55-59': [0.14307525980551028, 0.16705543901878936, 0.1931643684132524],
         '60-64': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
         '65-69': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
         '70-74': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
         '75-79': [1.1965060413135071, 1.3958914443309222, 1.6147912970366503],
         '80-84': [7.105, 8.292, 9.593],
         '85+': [7.105, 8.292, 9.593]
                       }
}
# IFRs = {k: {k1: sorted(np.array(v1) / 100) for k1, v1 in v.items()} for k, v in IFRs.items()}
age_group_translator = {'Y_LT5': '00-04', 'Y5-9': '05-09',
                        'Y10-14': '10-14', 'Y15-19': '15-19',
                        'Y20-24': '20-24', 'Y25-29': '25-29',
                        'Y30-34': '30-34', 'Y35-39': '35-39',
                        'Y40-44': '40-44', 'Y45-49': '45-49',
                        'Y50-54': '50-54', 'Y55-59': '55-59',
                        'Y60-64': '60-64', 'Y65-69': '65-69',
                        'Y70-74': '70-74', 'Y75-79': '75-79',
                        'Y80-84': '80-84', 'Y_GE85': '85+',
                        'Y85-89': '85-89', 'Y_GE90': '90+',}

with open(f'{result_dir_path}/simulation_complited.csv', 'w') as file:
    writer = csv.writer(file)
    writer.writerow(['completed'])
    
region_translator = {}
for country, regions in countries.items():
    for region, short in regions.items():
        c = country if region == 'All' else region
        region_translator[c] = short[:2] if c == country else short
# print(region_translator['Germany'])

for country_index, (country, regions) in enumerate(countries.items()):
    if country in ['Estonia', 'Sweden']:
        continue
    if country not in ['Germany']:
        continue
    format_label.update_mapping(country=country)
    bar.update()
    if not os.path.exists(f'{data_dir_path}/{country}/old_deaths.csv'):
        continue
    if not os.path.exists(f'{data_dir_path}/{country}/old_population.csv'):
        continue
    if not os.path.exists(f'{data_dir_path}/{country}/cases_5.csv'):
        continue
    if not os.path.exists(f'{data_dir_path}/{country}/deaths_5.csv'):
        continue
    
    if os.path.getsize(f'{data_dir_path}/{country}/old_deaths.csv') < 50 or os.path.getsize(f'{data_dir_path}/{country}/old_population.csv') < 50:
        continue
    if os.path.getsize(f'{data_dir_path}/{country}/cases_5.csv') < 50 or os.path.getsize(f'{data_dir_path}/{country}/deaths_5.csv') < 50:
        continue
    with open(f'{result_dir_path}/simulation_complited.csv', 'a') as file:
        writer = csv.writer(file)
        writer.writerow([country])
    
    # Load cases
    format_label.update_mapping(running='Loading cases' + ' ' * 30)
    bar.update()
    cases = load_cases(country)
    cases = cases.append(cases[cases['Age group'].isin(['85-89', '90-95', '95-99', '100-104'])].groupby(['date', 'location_name']).sum().reset_index())
    cases = cases.fillna({'Age group': '85+'})
    cases = cases[~cases['Age group'].isin(['85-89', '90-94', '95-99', '100-104'])].reset_index(drop=True)
    cases['location_name'] = cases['location_name'].replace(region_translator)
    cases['location_name'] = cases['location_name'].replace(region_translator)
    bar.update()
    
    # Load deaths
    format_label.update_mapping(running='Loading deaths' + ' ' * 30)
    bar.update()
    deaths = load_deaths(country)
    deaths = deaths.append(deaths[deaths['Age group'].isin(['85-89', '90-95', '95-99', '100-104'])].groupby(['date', 'location_name']).sum().reset_index())
    deaths = deaths.fillna({'Age group': '85+'})
    deaths = deaths[~deaths['Age group'].isin(['85-89', '90-94', '95-99', '100-104'])].reset_index(drop=True)
    deaths['location_name'] = deaths['location_name'].replace(region_translator)
    bar.update()
    
    # Load population
    format_label.update_mapping(running='Loading population' + ' ' * 30)
    bar.update()
    population = pd.read_csv(f'{data_dir_path}/{country}/population.csv')
    population = population.rename(columns=region_translator)
    bar.update()
    
    # Load and prepare total deaths
    format_label.update_mapping(running='Loading total deaths' + ' ' * 30)
    bar.update()
    total_deaths = pd.read_csv(f'{data_dir_path}/{country}/old_deaths.csv')
    total_deaths = total_deaths[total_deaths['Sex'] == 'T'].reset_index(drop=True)
    total_deaths = total_deaths[total_deaths['Year'].apply(lambda x: x[:4]) >= str(2015)].reset_index(drop=True)
    total_deaths = total_deaths[total_deaths['Region'].isin(list(regions.values()) + [list(regions.values())[0][:2]])].reset_index(drop=True)
    new_df = pd.DataFrame(columns=['Year', 'Region', 'Age group'] + list(np.arange(1, 54)))
    new_df = new_df.append(total_deaths[total_deaths['Sex'] == 'T'])
    new_df['Year'] = new_df['Year'].apply(lambda x: x[:4])
    new_df = new_df.drop(columns='Sex', axis=1)
    new_df = new_df.drop(columns='Value', axis=1)
    new_df = new_df.drop_duplicates()
    bar.update()
    
    for i in range(len(total_deaths)):
        region = total_deaths.iloc[i]['Region']
        age_group = total_deaths.iloc[i]['Age group']
        year = total_deaths.iloc[i]['Year'][:4]
        week = int(total_deaths.iloc[i]['Year'][6:])
        x = np.logical_and(new_df['Region'] == region, new_df['Year'] == year)
        x = np.logical_and(x, new_df['Year'] >= str(2015))
        x = np.logical_and(x, new_df['Age group'] == age_group)
        new_df.loc[x, week] = total_deaths.iloc[i]['Value']
        bar.update()
    total_deaths = new_df[new_df['Year'] >= str(2015)]
    total_deaths = total_deaths.drop(total_deaths.loc[total_deaths['Age group']=='UNK'].index)
    total_deaths = total_deaths.drop(total_deaths.loc[total_deaths['Age group']=='Y_GE75'].index)
    total_deaths = total_deaths.drop(total_deaths.loc[total_deaths['Age group']=='Y_GE80'].index).fillna(0)
    total_deaths.replace(age_group_translator, inplace=True)
    total_deaths = total_deaths.append(total_deaths[total_deaths['Age group'].isin(['85-89', '90+'])].groupby(['Year', 'Region']).sum().reset_index())
    total_deaths = total_deaths.fillna({'Age group': '85+'})
    total_deaths = total_deaths[~total_deaths['Age group'].isin(['85-89', '90+'])].reset_index(drop=True)
    bar.update()
    
    # Load and prepare prior population
    format_label.update_mapping(running='Loading prior population' + ' ' * 30)
    bar.update()
    old_population = pd.read_csv(f'{data_dir_path}/{country}/old_population.csv')
    old_population = old_population[old_population['Year'] >= 2015].reset_index().drop(columns='index', axis=1)
    prior_populations = dict()

    for year in range(2015, 2020):
        df = pd.DataFrame(columns=['Age group'] + list(old_population['Region'].unique()))
        df['Age group'] = old_population['Age group'].unique()
        for region in old_population['Region'].unique():
            condition = np.logical_and(old_population['Region'] == region, old_population['Year'] == year)
            condition = np.logical_and(condition, old_population['Sex'] == 'T')
            if len(old_population[condition]['Value']) < len(df['Age group']):
                df.loc[:, region] = 0
            else:
                df.loc[:, region] = list(old_population[condition]['Value'])
            bar.update()
        df = df.drop(df.loc[df['Age group']=='UNK'].index)
        df = df.drop(df.loc[df['Age group']=='Y_GE75'].index)
        df = df.drop(df.loc[df['Age group']=='Y_GE80'].index)
        df['Age group'].replace(age_group_translator, inplace=True)
        prior_populations[year] = df.sort_values(by='Age group').reset_index().drop('index', axis=1)
    bar.update()
    
    # Prepare regions set
    cases_regions_set = set(cases['location_name'].unique())
    deaths_regions_set = set(deaths['location_name'].unique())
    population_regions_set = set(population.columns)
    total_deaths_regions_set = set(total_deaths['Region'].unique())
    prior_populations_regions_set = set(prior_populations[2015].columns)
    regions_set = cases_regions_set
    regions_set = regions_set.intersection(deaths_regions_set)
    regions_set = regions_set.intersection(population_regions_set)
    regions_set = regions_set.intersection(total_deaths_regions_set)
    regions_set = regions_set.intersection(prior_populations_regions_set)
    
    

    for region in regions_set:
#         # Prepare dir
        format_label.update_mapping(running='Prepare dir' + ' ' * 30)
        bar.update()
        if not os.path.exists(f'{result_dir_path}/{country}'):
            os.mkdir(f'{result_dir_path}/{country}')
        region = region.replace('*', '')
        if not os.path.exists(f'{result_dir_path}/{country}/{region}'):
            os.mkdir(f'{result_dir_path}/{country}/{region}')  
            
        # Simulation
        format_label.update_mapping(running='Simulation' + ' ' * 30)
        bar.update()
        daily_regional_deaths_cumulative, weekly_regional_deaths_cumulative = prepare_weekly_covid_deaths(region)
        weekly_regional_deaths = (weekly_regional_deaths_cumulative.iloc[:, :-1] - weekly_regional_deaths_cumulative.iloc[:, :-1].shift(periods=1, axis='columns', fill_value=0)) / 7
        daily_regional_deaths = (daily_regional_deaths_cumulative.iloc[:, :-1] - daily_regional_deaths_cumulative.iloc[:, :-1].shift(periods=1, axis='columns', fill_value=0))
        weekly_regional_covid_cases = prepare_weekly_covid_cases(region)
        max_deaths = prepare_regional_deaths(region)
        
        correction_due_to_aging = calculate_correction_due_to_aging(region, max_deaths)
        max_deaths = calculate_reference_max_deaths(region, correction_due_to_aging, total_deaths)
        

        
        overall_deaths = calculate_overall_covid_deaths(region)
        
        # max_deaths_correct = max_deaths
        # max_deaths_correct.columns = [f'W{i:02d}' for i in range(1, 53 + 1)]
        # y2020 = max_deaths_correct.add_prefix('2020')
        # y2021 = max_deaths_correct.add_prefix('2021')
        # y2022 = max_deaths_correct.add_prefix('2022')
        # max_deaths_concatenated = pd.concat([y2020, y2021, y2022], axis=1)
        # max_deaths_concatenated = max_deaths_concatenated.loc[:, :overall_deaths.columns[-1]]
        # max_deaths_concatenated
        
        overall_minus_max = np.maximum(0, overall_deaths - max_deaths).loc[:, :weekly_regional_deaths.columns[-1]]
        weekly_regional_deaths = weekly_regional_deaths.append(pd.DataFrame(weekly_regional_deaths.sum(axis=0), columns=['TOTAL']).T)
        additional_deaths_not_attributed_to_covid = np.maximum(0, overall_minus_max[weekly_regional_deaths.columns] - weekly_regional_deaths).fillna(0)
        corrected_covid_deaths = (additional_deaths_not_attributed_to_covid * 0.85 + weekly_regional_deaths)
        
        # Plot results
        format_label.update_mapping(country=country, running='Ploting result' + ' ' * 30)
        
        # interpolate_timeline_to_days(corrected_covid_deaths, daily_regional_deaths, IFRs, ratios)
        # bar.update()
        # plot_weekly_deaths()
        # bar.update()
        # plot_weekly_cases()
        # bar.update()
        # plot_cumulative_deaths()
        # bar.update()
#         df = pd.DataFrame(columns=['gamma', 'factor 1', 'factor 2', 'f1 - f2'])
#         for index, gamma in enumerate(np.linspace(0, 1, 11)):
#             bar.update()
#             corrected_covid_deaths = additional_deaths_not_attributed_to_covid * gamma + weekly_regional_deaths
#             plot_corrected_covid_deaths()
#         fig, axs = plt.subplots(1, 2, figsize=(30,10))
#         for index, gamma in enumerate(np.linspace(0, 1, 11)):
#             bar.update()
#             corrected_covid_deaths = additional_deaths_not_attributed_to_covid * gamma + weekly_regional_deaths
#             factor_1 = corrected_covid_deaths.iloc[:-1].sum().sum()
#             factor_2 = corrected_covid_deaths.loc['TOTAL'].sum()
#             bar.update()
#             df = df.append(pd.DataFrame({'factor 1': factor_1, 'factor 2': factor_2, 'gamma': gamma, 'f1 - f2': factor_1 - factor_2}, index=[index]))
#             corrected_covid_deaths.iloc[:-1].sum().cumsum().plot(ax=axs[0], label=f'gamma = {gamma}')
#             corrected_covid_deaths.loc['TOTAL'].cumsum().plot(ax=axs[1], label=f'gamma = {gamma}')
            
#         axs[0].legend()
#         axs[0].grid()
#         axs[0].set_title('Factor 1')

#         axs[1].legend()
#         axs[1].grid()
#         axs[1].set_title('Factor 2')
#         plt.savefig(f'{result_dir_path}/{country}/{region}/death_factors.png')
#         df.plot(x='gamma', y=['factor 1', 'factor 2'], grid=True)
#         plt.savefig(f'{result_dir_path}/{country}/{region}/factors.png')
#         df.to_csv(f'{result_dir_path}/{country}/{region}/factors.csv')
        
        
    bar.update(country_index)
bar.finish()
print('Simulation done')

Time:  0:00:16 Elapsed Time: 0:00:16  (100%)   Country: Germany Running: Ploting result                                           


Simulation done


In [17]:
# country = 'Germany'
# region = 'DE'



# df = pd.DataFrame(columns=['gamma', 'factor 1', 'factor 2', 'f1 - f2'])
# fig, axs = plt.subplots(1, 2, figsize=(30,10))
# for index, gamma in enumerate(np.linspace(0, 1, 9)):
#     corrected_covid_deaths = additional_deaths_not_attributed_to_covid * gamma + weekly_regional_deaths
#     factor_1 = corrected_covid_deaths.iloc[:-1].sum().sum()
#     factor_2 = corrected_covid_deaths.loc['TOTAL'].sum()
#     df = df.append(pd.DataFrame({'factor 1': factor_1, 'factor 2': factor_2, 'gamma': gamma, 'f1 - f2': factor_1 - factor_2}, index=[index]))
#     corrected_covid_deaths.iloc[:-1].sum().cumsum().plot(ax=axs[0], label=f'gamma = {gamma}')
#     corrected_covid_deaths.loc['TOTAL'].cumsum().plot(ax=axs[1], label=f'gamma = {gamma}')
# axs[0].legend()
# axs[0].grid()
# axs[0].set_title('Factor 1')

# axs[1].legend()
# axs[1].grid()
# axs[1].set_title('Factor 2')
# plt.savefig(f'{result_dir_path}/{country}/{region}/death_factors.png')
# df.plot(x='gamma', y=['factor 1', 'factor 2'], grid=True)
# plt.savefig(f'{result_dir_path}/{country}/{region}/factors.png')
# df.to_csv(f'{result_dir_path}/{country}/{region}/factors.csv')

In [20]:
correction_due_to_aging

{2016: {'40-44': 0.9865733491498616,
  '45-49': 1.2923343898573691,
  '50-54': 1.069963846153846,
  '55-59': 0.8840052700922266,
  '60-64': 0.892443986961743,
  '65-69': 0.8818982084690554,
  '70-74': 0.9977860734037205,
  '75-79': 1.2276877515813687,
  '80-84': 0.7389964871194379,
  '85+': 0.8858139815186822,
  'TOTAL': 0.9954655844942459},
 2017: {'40-44': 0.9865733491498616,
  '45-49': 1.2923343898573691,
  '50-54': 1.069963846153846,
  '55-59': 0.8840052700922266,
  '60-64': 0.892443986961743,
  '65-69': 0.8818982084690554,
  '70-74': 0.9977860734037205,
  '75-79': 1.2276877515813687,
  '80-84': 0.7389964871194379,
  '85+': 0.8858139815186822,
  'TOTAL': 0.9954655844942459},
 2018: {'40-44': 0.9865733491498616,
  '45-49': 1.2923343898573691,
  '50-54': 1.069963846153846,
  '55-59': 0.8840052700922266,
  '60-64': 0.892443986961743,
  '65-69': 0.8818982084690554,
  '70-74': 0.9977860734037205,
  '75-79': 1.2276877515813687,
  '80-84': 0.7389964871194379,
  '85+': 0.8858139815186822,


In [19]:
cases

Unnamed: 0,date,location_name,Age group,value
0,2020-01-02,DE,00-04,0.0
1,2020-01-02,DE,05-09,0.0
2,2020-01-02,DE,10-14,0.0
3,2020-01-02,DE,15-19,0.0
4,2020-01-02,DE,20-24,0.0
...,...,...,...,...
216211,2022-01-19,DE_SL,85+,1955.3
216212,2022-01-19,DE_SN,85+,17796.6
216213,2022-01-19,DE_ST,85+,6183.2
216214,2022-01-19,DE_SH,85+,2896.0
