In [1]:
import os
import pathlib
import pandas as pd

from helpers.column import COLUMN_HEAD
from helpers.generate_mean_working_years import gen_working_years
from helpers.age_bindings import AGE_BINDINGS

In [2]:
def get_final_year():
    allowed_years = ['2020', '2021', '2022']
    msg = f'Please provide final year to analize: [allowed are: {allowed_years}] '

    get_year = input(msg)
    while get_year not in allowed_years:
        get_year = input(msg)
    
    print(f'Will analyze data for {get_year}')
    return get_year

In [3]:
filter_ages = [AGE_BINDINGS.AGE_15_19, AGE_BINDINGS.AGE_20_24, AGE_BINDINGS.AGE_25_29, AGE_BINDINGS.AGE_30_34,
                AGE_BINDINGS.AGE_35_39, AGE_BINDINGS.AGE_40_44, AGE_BINDINGS.AGE_45_49, AGE_BINDINGS.AGE_50_54,
                AGE_BINDINGS.AGE_55_59, AGE_BINDINGS.AGE_60_64]

In [4]:
life_exp = pd.DataFrame(gen_working_years())

In [5]:
cleaned_avg_population_file = pathlib.Path(f'{os.getcwd()}/../../../cleaned_data/Population_all_ages_2017-2019(Bulgaria).csv')
pop = pd.read_csv(cleaned_avg_population_file)
pop = pop.loc[:, [COLUMN_HEAD.LOCATION, COLUMN_HEAD.SEX, COLUMN_HEAD.AGE,  '2019']]
pop.rename(columns={'2019': COLUMN_HEAD.POPULATION}, inplace=True)

In [6]:
final_year = get_final_year()
mortality_file = pathlib.Path(f'{os.getcwd()}/../../../output_data/Excess_mortality/Predicted_excess_mortality_by_location_sex_age_year:{final_year}.csv')
mortality = pd.read_csv(mortality_file)
mortality = mortality[mortality[COLUMN_HEAD.AGE].isin(filter_ages)]


Will analyze data for 2022


In [7]:
pop = pop.groupby([COLUMN_HEAD.SEX, COLUMN_HEAD.LOCATION], as_index=False)[COLUMN_HEAD.POPULATION].sum()

In [8]:
exc_mort = mortality.merge(life_exp, on=[COLUMN_HEAD.AGE])


In [9]:
exc_mort[COLUMN_HEAD.WYLL_MEAN] = exc_mort.apply(lambda x:
                                    x[COLUMN_HEAD.WORKING_YEARS_LEFT_MEAN] * x[COLUMN_HEAD.EXCESS_MORTALITY_MEAN]
                                    if x[COLUMN_HEAD.EXCESS_MORTALITY_MEAN] > 0
                                    else 0,
                                    axis=1).round(1)

exc_mort[COLUMN_HEAD.WYLL_FLUCTUATION] = exc_mort.apply(lambda x:
                                            x[COLUMN_HEAD.WORKING_YEARS_LEFT_MEAN] * x[COLUMN_HEAD.CONFIDENCE_INTERVAL]
                                            if x[COLUMN_HEAD.WYLL_MEAN] > 0
                                            else 0,
                                            axis=1).round(1)

In [10]:
exc_mort = exc_mort.loc[(exc_mort[COLUMN_HEAD.IS_SIGNIFICANT] == 'Significant Increase'), :]

In [11]:
agg_params = {COLUMN_HEAD.EXCESS_MORTALITY_MEAN: 'sum',
                COLUMN_HEAD.CONFIDENCE_INTERVAL: 'sum',
                COLUMN_HEAD.WYLL_MEAN: 'sum',
                COLUMN_HEAD.WYLL_FLUCTUATION: 'sum',
                }

exc_mort = exc_mort.groupby([COLUMN_HEAD.SEX, COLUMN_HEAD.LOCATION, COLUMN_HEAD.IS_SIGNIFICANT], as_index=False).agg(agg_params)

In [12]:
exc_mort[COLUMN_HEAD.WYLL_AVG_MEAN] = exc_mort.apply(lambda x:
                                        x[COLUMN_HEAD.WYLL_MEAN] / x[COLUMN_HEAD.EXCESS_MORTALITY_MEAN],
                                        axis=1).round(2)

exc_mort[COLUMN_HEAD.WYLL_AVG_FLUC] = exc_mort.apply(lambda x:
                                        abs(
                                            (x[COLUMN_HEAD.WYLL_MEAN] + x[COLUMN_HEAD.WYLL_FLUCTUATION])
                                            /
                                            (x[COLUMN_HEAD.EXCESS_MORTALITY_MEAN] + x[COLUMN_HEAD.CONFIDENCE_INTERVAL])
                                            - x[COLUMN_HEAD.WYLL_AVG_MEAN]
                                        ),
                                        axis=1).round(2)


In [13]:
exc_mort = exc_mort.merge(pop, on=[COLUMN_HEAD.SEX, COLUMN_HEAD.LOCATION])

In [14]:
exc_mort[COLUMN_HEAD.WYLL_STD_MEAN] = exc_mort.apply(lambda x:
                                        (x[COLUMN_HEAD.WYLL_MEAN] / x[COLUMN_HEAD.POPULATION])
                                        *
                                        10 ** 5,
                                        axis=1).round(1)

exc_mort[COLUMN_HEAD.WYLL_STD_FLUC] = exc_mort.apply(lambda x:
                                        (x[COLUMN_HEAD.WYLL_FLUCTUATION] / x[COLUMN_HEAD.POPULATION])
                                        *
                                        10 ** 5,
                                        axis=1).round(3)

In [15]:
exc_mort[COLUMN_HEAD.WYLL_MEAN_DECORATED] = exc_mort[COLUMN_HEAD.WYLL_MEAN].round(1).map(str) + ' (±' + exc_mort[COLUMN_HEAD.WYLL_FLUCTUATION].round(1).map(str) + ')'
exc_mort[COLUMN_HEAD.WYLL_AVG_MEAN_DECORATED] = exc_mort[COLUMN_HEAD.WYLL_AVG_MEAN].map(str) + ' (±' + exc_mort[COLUMN_HEAD.WYLL_AVG_FLUC].map(str) + ')'
exc_mort[COLUMN_HEAD.WYLL_STD_MEAN_DECORATED] = exc_mort[COLUMN_HEAD.WYLL_STD_MEAN].map(str) + ' (±' + exc_mort[COLUMN_HEAD.WYLL_STD_FLUC].map(str) + ')'

In [16]:
file_name = f'WYLL_for_year:{final_year}.csv'
path = pathlib.Path(f'{os.getcwd()}/../../../output_data/Excess_mortality/{file_name}')
exc_mort.to_csv(path, index=False)