In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from generate_daily_temp import daily_temp_interp, create_noise, daily_temp_era5
from relative_risk_calculation import shift_rr, tz_tmrel_combinations, create_population_df, rr_paf_rr
from read_files import get_annual_pop, read_climate_data, read_IMAGEregions_and_TempZone, get_erf_dataframe, get_tmrel_map

In [5]:
wdir = 'X:\\user\\liprandicn\\Health Impacts Model'

### Define important variables
ssp =  'SSP2_CP' # 'SSP1_M', SSP2_CP, SSP3_H, SSP5_H
years = range(2010,2101) # range(2010,2101)
std_value = 5 # [1,5,10]
ccategories = ['C8'] # ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8']

In [None]:
# Load nc files that serve to generate artificial daily temperature
gcm_diff, gcm_start, gcm_end, cc_path_mean = read_climate_data()
# Load nc files that cointain the IMAGE region classification and the temperature zones
image_regions, temperature_zones = read_IMAGEregions_and_TempZone()
# Load Exposure Response Funciton files for the relevant diseases, select mean (draw) and locate them in a df with the right format
df_erf, diseases, min_dict, max_dict = get_erf_dataframe(wdir, all=False, mean=True, random_draw=False, draw=None)
# Load population nc file of the selected scenario
pop_ssp = get_annual_pop(ssp)
# Load file with optimal temperatures for 2020 (default year)
tmrel = get_tmrel_map(wdir, 2020, mean=True, random_draw=False, draw=None)
# Generate dataframe with rr shifted by the TMREL
df_erf_tmrel = shift_rr(df_erf, tz_tmrel_combinations(pop_ssp, tmrel, temperature_zones), diseases)
# Generate the daily temperature data variability
noise, noise_leap = create_noise(std_value)

In [None]:
for ccategory in ccategories:    
    
    # Create final dataframe
    rr_year = pd.DataFrame(index=range(1,27), columns=pd.MultiIndex.from_product([years, diseases, ['cold', 'hot', 'all']]))  

    for year in years:
        
        daily_temp, num_days = daily_temp_interp(ccategory, year, cc_path_mean, gcm_diff, gcm_start, gcm_end, noise, noise_leap)
        # daily_temp, num_days = daily_temp_era5(year, pop_ssp)

        # Select population for the corresponding year
        pop_ssp_year = pop_ssp.sel(time=f'{year}').mean('time').GPOP.values

        # Set a mask of pixels for each IMAGE region
        for region in range(1, 27):#
            
            # Get mask of 
            image_region_mask = (pop_ssp_year > 0.) & (image_regions == region)
            
            df_pop = create_population_df(image_region_mask, pop_ssp_year, temperature_zones, 
                                        daily_temp, tmrel, num_days, min_dict, max_dict)
            
            # df_counts[f'{year}'] = df_pop.groupby(['daily_temperature'])['population'].sum()
            
            # Merge the ERF with the grouped data to assign rr
            df_all = pd.merge(df_pop, df_erf_tmrel,  on=['temperature_zone', 'daily_temperature', 'tmrel'], how='left')

            # Make two new dataframes separating the cold and hot attributable deaths
            df_cold = df_all[df_all['daily_temperature'] < df_all['tmrel']].copy()
            df_hot = df_all[df_all['daily_temperature'] > df_all['tmrel']].copy()
                
            for df, temp_type in zip([df_hot, df_cold, df_all], ['hot', 'cold', 'all']):
                rr_paf_rr(df, rr_year, diseases, year, region, temp_type)
                
        print(f'Year {year} done') 
            
    # Save the results and temperature statistics
    rr_year.to_csv(f'{wdir}\\output\\rr_m1-1_all-diseases_{ccategory}_2010-2100_{std_value}std.csv')
    # rr_year.to_csv(f'{wdir}\\output\\rr_m1-1_all-diseases_{year}_era5.csv')  

In [9]:
year = 2019
image_regions = pd.read_csv(f'X:\\user\\liprandicn\\Health Impacts Model\\SocioeconomicData\\IMAGE_regions.csv',  index_col=0, header=0)

### Load GBD mortality data and select specific year and "all-ages" group
gbd_mortality = pd.read_csv(f'X:\\user\\liprandicn\\Health Impacts Model\\GBD_Data\\Mortality_Data\\IHME-GBD_2021_DATA-0dc55228.csv')
gbd_mortality = gbd_mortality[(gbd_mortality['year'] == year) & (gbd_mortality['age_name'] == 'All ages')]

# Make dictionary with mapping and invert it for mapping
image_regions_dic = image_regions.set_index('Region')['GBD region'].apply(lambda x: x.split(', ')).to_dict()
location_to_image_region = {loc: key for key, locations in image_regions_dic.items() for loc in locations}

# Sum deaths per IMAGE region
gbd_mortality["image_region"] = gbd_mortality["location_name"].map(location_to_image_region)
gbd_mortality_image_region = gbd_mortality.groupby(["image_region", "cause_name"], as_index=False).agg({"val": "sum"})#, "upper": "sum", "lower": "sum"})
gbd_mortality_image_region = gbd_mortality_image_region.pivot(index="image_region", columns="cause_name", values="val").fillna(0).rename_axis(None, axis=1)

In [None]:
rr_year = pd.read_csv(f'{wdir}\\output\\rr_m1-1_all-diseases_{year}_era5.csv', header=[0,1,2])
rr_year.set_index(image_regions['Region'], inplace=True)

In [11]:
diseases_full_names = {'ckd':'Chronic kidney disease', 'cvd_cmp':'Cardiomyopathy and myocarditis', 'cvd_htn':'Hypertensive heart disease', 
            'cvd_ihd':'Ischemic heart disease', 'cvd_stroke':'Stroke', 'diabetes':'Diabetes mellitus',
            'inj_animal':'Animal contact', 'inj_disaster':'Exposure to forces of nature', 'inj_drowning':'Drowning', 
            'inj_homicide':'Interpersonal violence', 'inj_mech':'Exposure to mechanical forces', 
            'inj_othunintent':'Other unintentional injuries', 'inj_suicide':'Self-harm', 'inj_trans_other':'Other transport injuries', 
            'inj_trans_road':'Road injuries', 'resp_copd':'Chronic obstructive pulmonary disease', 'lri':'Lower respiratory infections'}

In [None]:
dfs = {}
dfs['rr_all'] = rr_year.xs('all', axis=1, level=2).xs(f'{year}', axis=1, level=0)
dfs[f'rr_cold'] = rr_year.xs('cold', axis=1, level=2).xs(f'{year}', axis=1, level=0)
dfs[f'rr_hot'] = rr_year.xs('hot', axis=1, level=2).xs(f'{year}', axis=1, level=0)


for method in ['all', 'cold', 'hot']:
        dfs[f'paf_{method}'] = pd.DataFrame().reindex_like(dfs[f'rr_{method}'])
        for disease in diseases_full_names:
                dfs[f'paf_{method}'][disease] = 1 - (1/dfs[f'rr_{method}'][disease])
        dfs[f'mortality_{method}'] = pd.DataFrame().reindex_like(gbd_mortality_image_region)
        for disease in diseases_full_names:
                dfs[f'mortality_{method}'][diseases_full_names[disease]] = dfs[f'paf_{method}'][disease] * gbd_mortality_image_region[diseases_full_names[disease]] 

print(f'Global Attributable Burden: ALL = {int(dfs[f'mortality_all'].sum(axis=0).sum())}; COLD = {int(dfs[f'mortality_cold'].sum(axis=0).sum())}; HOT = {int(dfs[f'mortality_hot'].sum(axis=0).sum())}')

In [None]:
import matplotlib.cm as cm

method = 'hot'
cumval = 0
fig = plt.figure(figsize=(12, 8))

# Generate a color map for the diseases
num_colors = len(diseases_full_names.keys())
cmap = cm.get_cmap('tab20', num_colors)  
disease_colors = {disease: cmap(i) for i, disease in enumerate(diseases_full_names.values())}

df = dfs[f'mortality_{method}'][[diseases_full_names[d] for d in diseases_full_names.keys()]]
# Generate the stacked bar plot
for i, col in enumerate(df.columns):
    color = disease_colors.get(col)  
    plt.bar(df.index, df[col], bottom=cumval, color=color, label=col)
    cumval = cumval + df[col]

plt.xticks(rotation=90)
plt.ylabel('Total number of deaths')
plt.title(f'Total attributable deaths in {year} - {method} deaths')
plt.legend()
plt.show()