In [1]:
import copy
import numpy as np
import os
from pathlib import Path
import pandas as pd
import sys

from climada.hazard import Hazard
from climada.entity.exposures import Exposures
from climada.engine import ImpactCalc

os.chdir('/cluster/project/climate/meilers/scripts/displacement/global-displacement-risk')
import exposure
import vulnerability

# Constants
PATH_FL_HAZARD = Path('/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA')
PATH_RESULTS = Path('/cluster/work/climate/evelynm/IDMC_UNU/results/risk_fl')
DMG_THRESHS = {'low' : 0.3, 'med' : 0.55, 'high': 0.7}

# Functions
def agg_sparse_rps(sparse_bool, exp_gdf, rps, scen_name, group_admin1=True):
    """
    Aggregate a sparse boolean impact matrix (displacement True/False) obtained from various tiles and RP maps
    into the full exposure format per RP.
    
    Parameters
    -----------
    sparse_bool: sparse.csr_matrix
        imp_mat with types bool (contrasted against dmg threshold)
    rps : np.array 
        unique return periods to group by (1/frequ of hazard events)
    exp_gdf : gpd.GeoDataFrame
        exposure geodataframe to append impacts to
    scen_name : str 
        scenario name to differentiate various impact matrices by(suggestion {impfsource}_{thresh} or {thresh})
    group_admin1 : bool
        whether to group results by admin1 (default: True), else full exp_gdf returned
        
    Returns
    -------
    DataFrame with columns imp_{rps}_{scen_name}, either full exposure or grouped by admin1
    """
    full_bool = sparse_bool.toarray()

    # Ensure the boolean matrix and the exposure dataframe have compatible shapes
    assert full_bool.shape[1] == len(exp_gdf), "Shape mismatch between impact matrix and exposure data"

    # Calculate impacts for each return period and save to exposure GeoDataFrame
    for idx, rp in enumerate(rps):
        exp_gdf[f'imp_rp_{rp}_{scen_name}'] = full_bool[idx, :].astype(int) * exp_gdf['valhum']

    # Group exposure GeoDataFrame by admin1 and sum over impacts if required
    if group_admin1:
        return exp_gdf.groupby('admin1').sum()[[f'imp_rp_{rp}_{scen_name}' for rp in rps]]
    
    return exp_gdf[[f'imp_rp_{rp}_{scen_name}' for rp in rps]]

def compute_aeds(df_imps, rps, scen_name):
    """
    Manually compute average annually expected displacement, as 
    sum(displacement(rp) / rp) for all rp.
    
    Parameters
    ----------
    df_imps : pd.DataFrame
        DataFrame containing impact data.
    rps : list or np.array
        Return periods to consider.
    scen_name : str
        Scenario name to differentiate various impact matrices.
    
    Returns
    -------
    df_imps : pd.DataFrame
        DataFrame with an additional column for the average annually expected displacement.
    """
    df_imps[f'aed_{scen_name}'] = 0
    for rp in rps:
        df_imps[f'aed_{scen_name}'] += df_imps[f'imp_rp_{rp}_{scen_name}'] / rp 
    return df_imps

def compute_admin0(df_imps):
    """
    Sum over all exposures to get admin0 impacts.
    
    Parameters
    ----------
    df_imps : pd.DataFrame
        DataFrame containing impact data.
    
    Returns
    -------
    df_imps : pd.DataFrame
        DataFrame with an additional row for admin0 impacts.
    """
    imps_admin_0 = df_imps.sum(axis=0)
    imps_admin_0.name = 'admin0'
    return pd.concat([df_imps, imps_admin_0.to_frame().T], ignore_index=False)
    
def compute_impstats(list_dfimps, rps):
    """
    Given a list of X impact scenario DataFrames, compute min, median, and max impact per RP and for AED for all exposures.
    Note: Only makes sense if various impact sources in scenarios. 
    
    Parameters
    ----------
    list_dfimps : list of pd.DataFrame
        List of DataFrames containing impact data for different scenarios.
    rps : list or np.array
        Return periods to consider.
    
    Returns
    -------
    imp_stats : pd.DataFrame
        DataFrame containing the min, median, and max impacts per RP and for AED.
    """
    imp_all_scens = pd.concat(list_dfimps, axis=1)
    imp_stats = pd.DataFrame()

    for rp in rps:
        rp_columns = [col for col in imp_all_scens.columns if f'rp_{rp}_' in col]
        imp_stats[f'rp_{rp}_min'] = imp_all_scens[rp_columns].min(axis=1)
        imp_stats[f'rp_{rp}_med'] = imp_all_scens[rp_columns].median(axis=1)
        imp_stats[f'rp_{rp}_max'] = imp_all_scens[rp_columns].max(axis=1)

    aed_columns = [col for col in imp_all_scens.columns if 'aed' in col]
    imp_stats['aed_min'] = imp_all_scens[aed_columns].min(axis=1)
    imp_stats['aed_med'] = imp_all_scens[aed_columns].median(axis=1)
    imp_stats['aed_max'] = imp_all_scens[aed_columns].max(axis=1)

    return imp_stats

country = {'ETH': 'Ethiopia',
           'SOM': 'Somalia',
           'SDN': 'Sudan'}

In [2]:
def sort_columns_by_rp(df, rps, suffixes):
    sorted_cols = []
    for rp in np.sort(rps):
        for suffix in suffixes:
            col_name = f"imp_rp_{rp}_{suffix}"
            if col_name in df.columns:
                sorted_cols.append(col_name)
    for suffix in suffixes:
        aed_col = f"aed_{suffix}"
        if aed_col in df.columns:
            sorted_cols.append(aed_col)
    return df[sorted_cols]

In [3]:
cntry_iso = 'SOM'
rcp = 'HISTORICAL'
path_save = PATH_RESULTS / cntry_iso

if not path_save.is_dir():
    os.mkdir(path_save)

# load bem, make exp
gdf_bem_subcomps = exposure.gdf_from_bem_subcomps(cntry_iso, opt='full')
gdf_bem_subcomps = gdf_bem_subcomps[gdf_bem_subcomps.valhum>1] # filter out rows with basically no population
gdf_bem_subcomps = exposure.assign_admin1_attr(gdf_bem_subcomps, exposure.path_admin1_attrs, source='gadm')

exp = Exposures(gdf_bem_subcomps.copy())
exp.value_unit = 'building_unit'
exp.gdf['longitude'] = exp.gdf.geometry.x
exp.gdf['latitude'] = exp.gdf.geometry.y
exp.gdf['value'] = 1

In [4]:
# load hazard
HAZ_FOLDER = PATH_FL_HAZARD/country[cntry_iso]/rcp
haz_files = [str(file) for file in HAZ_FOLDER.glob('*.tif')]
rp = np.array([int(Path(file).stem[-4:]) for file in haz_files])

In [5]:
HAZ_FOLDER

PosixPath('/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL')

In [6]:
haz_files

['/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T0025.tif',
 '/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T0002.tif',
 '/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T0250.tif',
 '/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T0010.tif',
 '/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T0005.tif',
 '/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T0050.tif',
 '/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T1000.tif',
 '/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T0500.tif',
 '/cluster/work/climate/evelynm/IDMC_UNU/hazard/flood_HM_CIMA/Somalia/HISTORICAL/Hazmap__T0100.tif']

In [7]:
haz = Hazard.from_raster(
haz_type='FL', files_intensity=haz_files, src_crs='WGS84',
attrs={'unit': 'm', 'event_id': np.arange(len(haz_files)), 'frequency':1/rp})

In [8]:
rps = 1/haz.frequency
rps

array([  25.,    2.,  250.,   10.,    5.,   50., 1000.,  500.,  100.])

In [9]:
# compute physical impact and save for future postproc
# scenario 1: capra/cima impfs
dict_imp_bldg = {}
exp.gdf['impf_FL'] = exp.gdf['se_seismo'].map(vulnerability.DICT_PAGER_FLIMPF_CIMA)
dict_imp_bldg['cima'] = ImpactCalc(exp, vulnerability.IMPF_SET_FL_CIMA, haz).impact(save_mat=True)
# scenario 2: ivm impfs
exp.gdf['impf_FL'] = exp.gdf['se_seismo'].map(vulnerability.DICT_PAGER_FLIMPF_IVM)
dict_imp_bldg['ivm'] = ImpactCalc(exp, vulnerability.IMPF_SET_FL_IVM, haz).impact(save_mat=True)

# displacement postprocessing (thresholds)
dict_bools_displ = {
    'cima' : {},
    'ivm' : {}
}

In [10]:
suffixes = ['low', 'med', 'high']

for source in dict_imp_bldg.keys():
    for thresh in DMG_THRESHS.keys():
        dict_bools_displ[source][thresh] = dict_imp_bldg[source].imp_mat > DMG_THRESHS[thresh]

# Impact df per scenario & RP + AED
dict_df_imps_admin1 = {
    'cima': {},
    'ivm': {}
}

for source, dict_threshs in dict_bools_displ.items():
    for thresh, sparse_bool in dict_threshs.items():
        # Compute impacts and AED
        dict_df_imps_admin1[source][thresh] = agg_sparse_rps(sparse_bool, exp.gdf, rps, thresh, group_admin1=True)
        dict_df_imps_admin1[source][thresh] = compute_aeds(dict_df_imps_admin1[source][thresh], rps, thresh)
        dict_df_imps_admin1[source][thresh] = compute_admin0(dict_df_imps_admin1[source][thresh])
        
        # Sort columns by RP
        dict_df_imps_admin1[source][thresh] = sort_columns_by_rp(dict_df_imps_admin1[source][thresh], rps, suffixes)

In [11]:
res_df = dict_df_imps_admin1['ivm']['med']
res_df

Unnamed: 0,imp_rp_2.0_med,imp_rp_5.0_med,imp_rp_10.0_med,imp_rp_25.0_med,imp_rp_50.0_med,imp_rp_100.0_med,imp_rp_250.0_med,imp_rp_500.0_med,imp_rp_1000.0_med,aed_med
925.0,0.0,2423.795,4334.416,9214.566,9245.712,12270.56,12328.83,12340.28,12358.52,1680.75754
1500.0,0.0,3352.189,3352.189,3352.189,3352.189,3352.189,3352.189,3352.189,3352.189,1263.77532
2836.0,0.0,240.3509,246.7542,246.7542,363.3745,374.7301,376.4441,700.6955,776.1198,97.313834
2837.0,0.0,7617.649,7702.039,29168.11,30710.82,31597.74,32116.68,56518.77,60353.01,4692.509242
2838.0,0.0,5772.433,5772.433,207463.5,252169.7,252169.7,377455.3,377455.3,377455.3,20237.546863
2839.0,0.0,8565.24,8565.24,10644.77,21104.14,21104.14,21105.73,21113.17,21113.17,3776.249328
2840.0,0.0,10973.35,11352.45,11638.5,12594.72,13472.53,13806.93,15950.37,17150.64,4286.353489
2841.0,0.0,64303.72,73436.06,83379.75,95300.91,105166.6,108413.5,124755.5,130411.7,27310.80124
2842.0,0.0,53698.98,66739.76,123582.8,133189.4,151309.1,169872.7,187129.6,215723.1,27803.438704
2843.0,0.0,227990.0,242549.3,321713.4,476831.3,480183.9,484055.5,490475.4,495552.1,100472.654392


In [12]:
df_formatted = res_df.applymap(lambda x: f"{x:,.0f}")
df_formatted

Unnamed: 0,imp_rp_2.0_med,imp_rp_5.0_med,imp_rp_10.0_med,imp_rp_25.0_med,imp_rp_50.0_med,imp_rp_100.0_med,imp_rp_250.0_med,imp_rp_500.0_med,imp_rp_1000.0_med,aed_med
925.0,0,2424,4334,9215,9246,12271,12329,12340,12359,1681
1500.0,0,3352,3352,3352,3352,3352,3352,3352,3352,1264
2836.0,0,240,247,247,363,375,376,701,776,97
2837.0,0,7618,7702,29168,30711,31598,32117,56519,60353,4693
2838.0,0,5772,5772,207463,252170,252170,377455,377455,377455,20238
2839.0,0,8565,8565,10645,21104,21104,21106,21113,21113,3776
2840.0,0,10973,11352,11639,12595,13473,13807,15950,17151,4286
2841.0,0,64304,73436,83380,95301,105167,108413,124756,130412,27311
2842.0,0,53699,66740,123583,133189,151309,169873,187130,215723,27803
2843.0,0,227990,242549,321713,476831,480184,484055,490475,495552,100473


In [13]:
#save all necessary outputs
pd.concat(dict_df_imps_admin1['cima'].values(), axis=1).to_csv(path_save / f'{cntry_iso}_{rcp}_cima.csv')
pd.concat(dict_df_imps_admin1['ivm'].values(), axis=1).to_csv(path_save / f'{cntry_iso}_{rcp}_ivm.csv')