In [1]:
import os
import pathlib
import pandas as pd

from helpers.column import COLUMN_HEAD

In [2]:
def get_final_year():
    allowed_years = ['2020', '2021', '2022']
    msg = f'Please provide final year to analize: [allowed are: {allowed_years}] '

    get_year = input(msg)
    while get_year not in allowed_years:
        get_year = input(msg)
    
    print(f'Will analyze data for {get_year}')
    return get_year

In [3]:
cleaned_avg_population_file = pathlib.Path(f'{os.getcwd()}/../../../cleaned_data/Population_all_ages_2017-2019(Bulgaria).csv')
pop = pd.read_csv(cleaned_avg_population_file)
pop = pop.loc[:, [COLUMN_HEAD.LOCATION, COLUMN_HEAD.SEX, COLUMN_HEAD.AGE,  '2019']]
pop.rename(columns={'2019': COLUMN_HEAD.POPULATION}, inplace=True)

In [4]:
life_tables_file = pathlib.Path(f'{os.getcwd()}/../../../output_data/life_table_calculations/Bulgaria_Abidged_life_table.csv')
life_tables = pd.read_csv(life_tables_file)
life_tables = life_tables.loc[:, [COLUMN_HEAD.LOCATION, COLUMN_HEAD.SEX, COLUMN_HEAD.AGE, 'e(x)']]
life_tables.rename(columns={'e(x)': COLUMN_HEAD.LIFE_EXPECTANCY}, inplace=True)

In [5]:
final_year = get_final_year()
mortality_file = pathlib.Path(f'{os.getcwd()}/../../../output_data/Excess_mortality/Predicted_excess_mortality_by_location_sex_age_year:{final_year}.csv')
mortality = pd.read_csv(mortality_file)


Will analyze data for 2021


In [6]:
pop = pop.groupby([COLUMN_HEAD.SEX, COLUMN_HEAD.LOCATION], as_index=False)[COLUMN_HEAD.POPULATION].sum()

In [7]:
exc_mort = mortality.merge(life_tables, on=[COLUMN_HEAD.AGE, COLUMN_HEAD.SEX, COLUMN_HEAD.LOCATION])


In [8]:
exc_mort[COLUMN_HEAD.PYLL_MEAN] = exc_mort.apply(lambda x:
                                          x[COLUMN_HEAD.LIFE_EXPECTANCY] * x[COLUMN_HEAD.EXCESS_MORTALITY_MEAN]
                                          if x[COLUMN_HEAD.EXCESS_MORTALITY_MEAN] > 0
                                          else 0,
                                          axis=1).round(1)

exc_mort[COLUMN_HEAD.PYLL_FLUCTUATION] = exc_mort.apply(lambda x:
                                                 x[COLUMN_HEAD.LIFE_EXPECTANCY] * x[COLUMN_HEAD.CONFIDENCE_INTERVAL]
                                                 if x[COLUMN_HEAD.PYLL_MEAN] > 0
                                                 else 0,
                                                 axis=1).round(1)

In [9]:
exc_mort = exc_mort.loc[(exc_mort[COLUMN_HEAD.IS_SIGNIFICANT] == 'Significant Increase'), :]

In [10]:
agg_params = {COLUMN_HEAD.EXCESS_MORTALITY_MEAN: 'sum',
                      COLUMN_HEAD.CONFIDENCE_INTERVAL:   'sum',
                      COLUMN_HEAD.PYLL_MEAN:             'sum',
                      COLUMN_HEAD.PYLL_FLUCTUATION:      'sum',
                      }
exc_mort = exc_mort.groupby([COLUMN_HEAD.SEX, COLUMN_HEAD.LOCATION, COLUMN_HEAD.IS_SIGNIFICANT], as_index=False).agg(agg_params)

In [11]:
exc_mort[COLUMN_HEAD.PYLL_AVG_MEAN] = exc_mort.apply(lambda x:
                                              x[COLUMN_HEAD.PYLL_MEAN] / x[COLUMN_HEAD.EXCESS_MORTALITY_MEAN],
                                              axis=1).round(2)

exc_mort[COLUMN_HEAD.PYLL_AVG_FLUC] = exc_mort.apply(lambda x:
                                              abs(
                                                  (x[COLUMN_HEAD.PYLL_MEAN] + x[COLUMN_HEAD.PYLL_FLUCTUATION])
                                                  /
                                                  (x[COLUMN_HEAD.EXCESS_MORTALITY_MEAN] + x[COLUMN_HEAD.CONFIDENCE_INTERVAL])
                                                  - x[COLUMN_HEAD.PYLL_AVG_MEAN]
                                              ),
                                              axis=1).round(2)

In [12]:
exc_mort = exc_mort.merge(pop, on=[COLUMN_HEAD.SEX, COLUMN_HEAD.LOCATION])

In [13]:
exc_mort[COLUMN_HEAD.PYLL_STD_MEAN] = exc_mort.apply(lambda x:
                                              (x[COLUMN_HEAD.PYLL_MEAN] / x[COLUMN_HEAD.POPULATION])
                                              *
                                              10 ** 5,
                                              axis=1).round(1)

exc_mort[COLUMN_HEAD.PYLL_STD_FLUC] = exc_mort.apply(lambda x:
                                              (x[COLUMN_HEAD.PYLL_FLUCTUATION] / x[COLUMN_HEAD.POPULATION])
                                              *
                                              10 ** 5,
                                              axis=1).round(3)

In [14]:
exc_mort[COLUMN_HEAD.PYLL_MEAN_DECORATED] = exc_mort[COLUMN_HEAD.PYLL_MEAN].round(1).map(str) + ' (±' + exc_mort[
            COLUMN_HEAD.PYLL_FLUCTUATION].round(1).map(str) + ')'

exc_mort[COLUMN_HEAD.PYLL_AVG_MEAN_DECORATED] = exc_mort[COLUMN_HEAD.PYLL_AVG_MEAN].map(str) + ' (±' + exc_mort[
            COLUMN_HEAD.PYLL_AVG_FLUC].map(str) + ')'

exc_mort[COLUMN_HEAD.PYLL_STD_MEAN_DECORATED] = exc_mort[COLUMN_HEAD.PYLL_STD_MEAN].map(str) + ' (±' + exc_mort[
            COLUMN_HEAD.PYLL_STD_FLUC].map(str) + ')'

In [15]:
file_name = f'PYLL_for_year:{final_year}.csv'
path = pathlib.Path(f'{os.getcwd()}/../../../output_data/Excess_mortality/{file_name}')
exc_mort.to_csv(path, index=False)