# Tropical cyclone hazard for displacement risk modelling

This tutorial shows how to use the TC hazard event sets for historical and future periods.

Technical specifications.

Resolution: 150 arcsec 

**Emission scenarios considered:** SSP245, SSP370, SSP585.  
**GCMs considered:** .  
**(Future) years considered:** 1980-2018 (ERA-5) & 1995-2014 (20thcal GCM output) - 2041-2060, 2081-2100.  

In [1]:
import os
import pandas as pd
import numpy as np

os.chdir('/Users/simonameiler/Documents/WCR/Displacement/global-displacement-risk')
#os.chdir('/cluster/project/climate/meilers/scripts/displacement/global-displacement-risk') # change back to root folder, not "~/doc"
import exposure, vulnerability

### Country to basin assignment - step 1

For tropical cyclone, the hazard data is split up into four broad regions. We first need to assign each country to the respective TC region. We do this in a 3 step process:

1. check if there's a match with existing region/country dictionaries  
2. if not, keep the regions in the 'ROW' category and first get exposure data from the country; then, from the countries lat/lon, try to assign the country to a basin  
3. if the country spans multiple basins, keep the 'ROW' category and load the global TC data

Step 1 follows next. Step 2 and 3 further below.

In [2]:
from climada.entity.impact_funcs.trop_cyclone import ImpfSetTropCyclone
iso3n_per_region = ImpfSetTropCyclone.get_countries_per_region()[3]

In [3]:
region_ids_TC_bsn = dict(iso3n_per_region)  # Copy original dictionary

# Combine NA1 and NA2
region_ids_TC_bsn['AP'] = region_ids_TC_bsn.pop('NA1') + region_ids_TC_bsn.pop('NA2')

# Get the value associated with key 'NI'
ni_values = region_ids_TC_bsn.pop('NI')

# Add the key 'IO' with the retrieved value
region_ids_TC_bsn['IO'] = ni_values

# Combine OC and SI
region_ids_TC_bsn['SH'] = region_ids_TC_bsn.pop('OC') + region_ids_TC_bsn.pop('SI')

# Combine WP1, WP2, WP3, and WP4
region_ids_TC_bsn['WP'] = region_ids_TC_bsn.pop('WP1') + region_ids_TC_bsn.pop('WP2') + region_ids_TC_bsn.pop('WP3') + region_ids_TC_bsn.pop('WP4')

In [4]:
def get_region(country):
    for region, countries in region_ids_TC_bsn.items():
        if country in countries:
            return region
    return None

cntry_iso = 'LKA'
reg = get_region(cntry_iso)

reg

'IO'

## Load exposure from BEM

In [5]:
from climada.entity.exposures import Exposures

In [None]:
# Load exposure
gdf_bem_subcomps = exposure.gdf_from_bem_subcomps(cntry_iso, opt='full')
gdf_bem_subcomps = gdf_bem_subcomps[gdf_bem_subcomps.valhum>1] # filter out rows with basically no population
gdf_bem_subcomps["impf_TC"] = gdf_bem_subcomps.apply(lambda row: vulnerability.DICT_PAGER_TCIMPF_CAPRA[row.se_seismo], axis=1)
gdf_bem_subcomps = exposure.assign_admin1_attr(gdf_bem_subcomps, exposure.path_admin1_attrs, source='gadm')

In [None]:
# Make expousre
exp = Exposures(gdf_bem_subcomps.copy())
exp.gdf.rename({'valhum': 'value'}, axis=1, inplace=True)
exp.value_unit = 'Pop. count'
exp.gdf['longitude'] = exp.gdf.geometry.x
exp.gdf['latitude'] = exp.gdf.geometry.y
exp.gdf = exp.gdf[~np.isnan(
    exp.gdf.latitude)]  # drop nan centroids

exp.gdf.head()

In [None]:
print('Total population '+str(cntry_iso)+': ' + "{:,.0f}".format(exp.gdf.value.sum()))

**Make a new exposure object for each admin1 region**

In [None]:
ad1 = np.unique(exp.gdf.admin1).tolist()

In [None]:
import copy as cp

exp_dict = {}
exp_dict['admin0'] = exp
for admin1 in ad1:
    admin1_gdf = exp.gdf[exp.gdf.admin1 == admin1]
    exp_admin1 = cp.deepcopy(exp)
    exp_admin1.gdf = admin1_gdf
    exp_dict['admin1'+'_'+str(int(admin1))] = exp_admin1

#### Get lat/lon min/max from exposure

In [None]:
lat_min, lat_max, lon_min, lon_max = exp.gdf['latitude'].min(), exp.gdf['latitude'].max(), exp.gdf['longitude'].min(), exp.gdf['longitude'].max()

In [None]:
lat_min, lat_max, lon_min, lon_max

In [None]:
BASIN_BOUNDS = {
    # North Atlantic/Eastern Pacific Basin
    'AP': [-180.0, 10.0, 0.0, 85.0],

    # Indian Ocean Basin
    'IO': [10.0, 100.0, 0.0, 85.0],

    # Southern Hemisphere Basin
    'SH': [-180.0, 180.0, -85.0, 0.0],

    # Western Pacific Basin
    'WP': [100.0, 180.0, 0.0, 85.0],
}

def assign_basin(lon_min, lon_max, lat_min, lat_max):
    assigned_basins = []
    
    for basin, bounds in BASIN_BOUNDS.items():
        b_lon_min, b_lon_max, b_lat_min, b_lat_max = bounds
        
        # Check if the country bounds fit entirely within a single basin
        if (lon_min >= b_lon_min and lon_max <= b_lon_max and
            lat_min >= b_lat_min and lat_max <= b_lat_max):
            return basin
        
        # Check if the country bounds overlap with the basin bounds
        if (lon_min <= b_lon_max and lon_max >= b_lon_min and
            lat_min <= b_lat_max and lat_max >= b_lat_min):
            assigned_basins.append(basin)
    
    # If the country spans multiple basins, assign 'ROW'
    if len(assigned_basins) > 1:
        return 'ROW'
    
    # If the country only fits in one basin or doesn't overlap any basin
    return assigned_basins[0] if assigned_basins else 'Unknown Basin'

In [None]:
assigned_basin = assign_basin(lon_min, lon_max, lat_min, lat_max)
print(f"The country is assigned to the {assigned_basin} basin.")

In [None]:
if assigned_basin != 'ROW':
    reg = assigned_basin
else:
    reg = 'ROW'

In [None]:
reg

## Load hazard

### a) Event sets historical

In [None]:
from climada.util.constants import SYSTEM_DIR
from climada.hazard import TropCyclone, Hazard
hazard_dir = SYSTEM_DIR/"hazard"/"present"

In [None]:
if reg != 'ROW':
    tc_haz = TropCyclone.from_hdf5(hazard_dir.joinpath(f'TC_{reg}_0150as_MIT_H08.hdf5'))
else:
    haz_list = []
    for bsn in ['AP', 'IO', 'SH', 'WP']:
        haz = TropCyclone.from_hdf5(hazard_dir.joinpath(f'TC_{bsn}_0150as_MIT_H08.hdf5'))
        haz_list.append(haz)
    tc_haz = Hazard.concat(haz_list)

In [None]:
tc_haz_sel = tc_haz.select(extent=(lon_min, lon_max, lat_min, lat_max))

In [None]:
tc_haz_sel.plot_intensity(event=0)

### b) Event sets future

In [None]:
models = ['cesm2', 'cnrm6', 'ecearth6', 'fgoals', 'ipsl6', 'miroc6', 'mpi6', 'mri6', 'ukmo6']
rcp = 'ssp370'
scenario = ['20thcal', 'cal', '_2cal']
yr_dict = {'20thcal': 'hist',
           'cal': 2050,
          '_2cal': 2100}

In [None]:
hazard_dir = SYSTEM_DIR/"hazard"/"future"

fut_haz_dict = {}

for gcm in models:
    for scen in scenario:
        if reg != 'ROW':
            if scen == '20thcal':
                haz_str = f"TC_{reg}_0150as_MIT_{gcm}_{scen}_H08.hdf5"
                haz = TropCyclone.from_hdf5(hazard_dir.joinpath(haz_str))
                haz_sel = haz.select(extent=(lon_min, lon_max, lat_min, lat_max))
                fut_haz_dict[gcm+'_'+str(yr_dict[scen])] = haz_sel
            else:
                haz_str = f"TC_{reg}_0150as_MIT_{gcm}_{rcp}{scen}_H08.hdf5"
                haz = TropCyclone.from_hdf5(hazard_dir.joinpath(haz_str))
                haz_sel = haz.select(extent=(lon_min, lon_max, lat_min, lat_max))
                fut_haz_dict[gcm+'_'+rcp+'_'+str(yr_dict[scen])] = haz_sel
        else:
            haz_list = []
            for bsn in ['AP', 'IO', 'SH', 'WP']:
                if scen == '20thcal':
                    haz_str = f"TC_{bsn}_0150as_MIT_{gcm}_{scen}_H08.hdf5"
                else:
                    haz_str = f"TC_{bsn}_0150as_MIT_{gcm}_{rcp}{scen}_H08.hdf5"
                haz = TropCyclone.from_hdf5(hazard_dir.joinpath(haz_str))
                haz_list.append(haz)
            combined_haz = Hazard.concat(haz_list)
            haz_sel = combined_haz.select(extent=(lon_min, lon_max, lat_min, lat_max))
            if scen == '20thcal':
                fut_haz_dict[gcm+'_'+str(yr_dict[scen])] = haz_sel
            else:
                fut_haz_dict[gcm+'_'+rcp+'_'+str(yr_dict[scen])] = haz_sel

## Impact functions

In [None]:
impf_set_tc = vulnerability.IMPF_SET_TC_CAPRA

In [None]:
from climada.entity import ImpactFunc, ImpactFuncSet
impf_set_tc_step = ImpactFuncSet()

In [None]:
# The threshold of building damage after which all people are displaced. Below, no-one is displaced.
building_thresh = 0.55 # 55% iDMC v1; CIMA: 30% for Somalia to 60% for other countries. 

for imp_id in impf_set_tc.get_ids(haz_type='TC'):
    impf_set_tc.get_func(fun_id=imp_id)
    y = impf_set_tc.get_func(fun_id=imp_id)[0].intensity
    x = impf_set_tc.get_func(fun_id=imp_id)[0].mdd
    thresh = np.interp(building_thresh, x, y)
    impf_set_tc_step.append(
                ImpactFunc.from_step_impf(
                    intensity=(0,  thresh, thresh *10),
                    haz_type='TC',
                    impf_id=imp_id,
                    intensity_unit = 'm/s'
                )
    )

## Impacts

### a) Historical

In [None]:
from climada.engine import ImpactCalc

impcalc = ImpactCalc(exp, impf_set_tc_step, tc_haz_sel)
impact = impcalc.impact()

In [None]:
print('Annual average displacement: ' + "{:,.0f}".format(impact.aai_agg))

In [None]:
freq_curve = impact.calc_freq_curve(return_per=np.arange(1, 251, 1))
rp_indices = [9, 24, 49, 99, 249]
pm_data_hist = [freq_curve.impact[idx] for idx in rp_indices]
freq_curve.plot()

In [None]:
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

fig, ax = plt.subplots(figsize=(8, 6), subplot_kw={'projection': ccrs.PlateCarree()})

# Use the plot method and pass the GeoAxes
impact.plot_hexbin_eai_exposure(
    ignore_zero=True,
    pop_name=False,
    axis=ax
)

plt.show()

### b) Future

In [None]:
impact_dict = {}
for fut, haz in fut_haz_dict.items():
    impcalc_fut = ImpactCalc(exp, impf_set_tc_step, haz)
    impact_fut = impcalc_fut.impact()
    impact_dict[fut] = impact_fut

In [None]:
aai_agg_dict = {}
pmd_dict = {}
for fut, imp in impact_dict.items():
    aai_agg_dict[fut] = imp.aai_agg
    freq_curve = imp.calc_freq_curve(return_per=np.arange(1, 251, 1))
    pm_data = [freq_curve.impact[idx] for idx in rp_indices]
    pmd_dict[fut] = pm_data

In [None]:
aai_agg_dict = {'ERA-5_hist': impact.aai_agg, **aai_agg_dict}

In [None]:
pmd_dict = {'ERA-5_hist': pm_data_hist, **pmd_dict}

In [None]:
# Create DataFrame
data = {
    'Model': [],
    'Scenario': [],
    'Period': [],
    'AAD': []
}

for key, value in aai_agg_dict.items():
    parts = key.split('_')
    if len(parts) == 3: 
        model, scenario, period = parts
    elif len(parts) == 2: 
        model, scenario = parts
        period = ""
    else:
        continue

    data['Model'].append(model)
    data['Scenario'].append(scenario)
    data['Period'].append(period)
    data['AAD'].append(value)

df = pd.DataFrame(data)

# Iterate and add new columns for each RP value
for rp, rp_value in zip([10, 25, 50, 100, 250], zip(*pmd_dict.values())):
    df[f'RP_{rp}'] = rp_value

df

In [None]:
# Filter out the rows to exclude 'ERA-5', 'hist', and empty 'Period'
filtered_df = df[(df['Model'] != 'ERA-5')]

# Calculate the median for AAD, RP_10 ... RP_250 for each combination of scenario and period and store the median values in a new dataframe called "df_abs"
df_abs = filtered_df.groupby(['Scenario', 'Period']).median().reset_index()
df_abs

In [None]:
# take multi-model median of the delta values...

In [None]:
# Calculate the delta_df
delta_df_list = []

# Filter out the hist scenarios
hist_df = df[(df['Model'] != 'ERA-5') & (df['Scenario'] == 'hist')]

# Loop through each Model
for model in df['Model'].unique():
    # Filter the DataFrame for the current model
    model_df = df[df['Model'] == model]
    
    # Check if hist values exist for the current model
    if not model_df[model_df['Scenario'] == 'hist'].empty:
        # Get the hist values for the current model
        hist_values = model_df[model_df['Scenario'] == 'hist'].iloc[0]

        # Loop through each period
        for period in ['2050', '2100']:
            scenario_df = model_df[model_df['Period'] == period]
            if not scenario_df.empty:
                # Calculate the difference
                delta_values = scenario_df.iloc[0].copy()
                # Ensure only numeric columns are processed
                for col in df.columns[3:]:  # Adjust index as needed
                    if pd.api.types.is_numeric_dtype(df[col]):
                        delta_values[col] = scenario_df.iloc[0][col] - hist_values[col]
                #delta_values['Scenario'] = f'ssp245_{period}_delta'
                delta_df_list.append(delta_values)

# Combine all delta values into a single DataFrame
delta_df = pd.DataFrame(delta_df_list)

delta_df

In [None]:
hist_df = df_abs.iloc[0]
diff_df = delta_df.groupby('Period').median().reset_index()
for column in ['AAD', 'RP_10', 'RP_25', 'RP_50', 'RP_100', 'RP_250']:
    diff_df[column] = diff_df[column] - hist_df[column]
diff_df

In [None]:
era5_df = df[df['Model'] == 'ERA-5'].drop(columns=['Model', 'Scenario', 'Period'])
era5_df

In [None]:
df_fut = diff_df.copy()
for column in ['AAD', 'RP_10', 'RP_25', 'RP_50', 'RP_100', 'RP_250']:
    df_fut[column] = df_fut[column] + era5_df[column].values[0]
    
df_fut

In [None]:
df_fut['Scenario'] = rcp
era5_df['Scenario'] = rcp
era5_df['Period'] = 'hist'

res_df = pd.concat([era5_df, df_fut], ignore_index=True)

columns_order = ['Period', 'Scenario', 'AAD', 'RP_10', 'RP_25', 'RP_50', 'RP_100', 'RP_250']
res_df = res_df[columns_order]

In [None]:
#res_df = pd.concat([era5_df, df_fut], ignore_index=True)
res_df

## Including admin1 regions

In [43]:
# Initialize a list to store DataFrames
all_dfs = []

for key, exp_obj in exp_dict.items():
    # Perform calculations as before but within the loop
    
    impcalc = ImpactCalc(exp_obj, impf_set_tc_step, tc_haz_sel)
    impact = impcalc.impact()

    freq_curve = impact.calc_freq_curve(return_per=np.arange(1, 251, 1))
    rp_indices = [9, 24, 49, 99, 249]
    pm_data_hist = [freq_curve.impact[idx] for idx in rp_indices]

    impact_dict = {}
    for fut, haz in fut_haz_dict.items():
        impcalc_fut = ImpactCalc(exp_obj, impf_set_tc_step, haz)
        impact_fut = impcalc_fut.impact()
        impact_dict[fut] = impact_fut

    aai_agg_dict = {}
    pmd_dict = {}
    for fut, imp in impact_dict.items():
        aai_agg_dict[fut] = imp.aai_agg
        freq_curve = imp.calc_freq_curve(return_per=np.arange(1, 251, 1))
        pm_data = [freq_curve.impact[idx] for idx in rp_indices]
        pmd_dict[fut] = pm_data

    aai_agg_dict = {'ERA-5_hist': impact.aai_agg, **aai_agg_dict}
    pmd_dict = {'ERA-5_hist': pm_data_hist, **pmd_dict}

    # Create DataFrame
    data = {
        'Model': [],
        'Scenario': [],
        'Period': [],
        'AAD': []
    }

    for fut_key, value in aai_agg_dict.items():
        parts = fut_key.split('_')
        if len(parts) == 3: 
            model, scenario, period = parts
        elif len(parts) == 2: 
            model, scenario = parts
            period = ""
        else:
            continue

        data['Model'].append(model)
        data['Scenario'].append(scenario)
        data['Period'].append(period)
        data['AAD'].append(value)

    df = pd.DataFrame(data)

    # Iterate and add new columns for each RP value
    for rp, rp_value in zip([10, 25, 50, 100, 250], zip(*pmd_dict.values())):
        df[f'RP_{rp}'] = rp_value

    # Filter out the rows to exclude 'ERA-5'
    filtered_df = df[(df['Model'] != 'ERA-5')]

    # Calculate the median for AAD, RP_10 ... RP_250 for each combination of scenario and period and store the median values in a new dataframe called "df_abs"
    df_abs = filtered_df.groupby(['Scenario', 'Period']).median().reset_index()

    # Calculate the delta_df
    delta_df_list = []

    # Loop through each Model
    for model in df['Model'].unique():
        # Filter the DataFrame for the current model
        model_df = df[df['Model'] == model]

        # Check if hist values exist for the current model
        if not model_df[model_df['Scenario'] == 'hist'].empty:
            # Get the hist values for the current model
            hist_values = model_df[model_df['Scenario'] == 'hist'].iloc[0]

            # Loop through each period
            for period in ['2050', '2100']:
                scenario_df = model_df[model_df['Period'] == period]
                if not scenario_df.empty:
                    # Calculate the difference
                    delta_values = scenario_df.iloc[0].copy()
                    # Ensure only numeric columns are processed
                    for col in df.columns[3:]:  # Adjust index as needed
                        if pd.api.types.is_numeric_dtype(df[col]):
                            delta_values[col] = scenario_df.iloc[0][col] - hist_values[col]
                    #delta_values['Scenario'] = f'{rcp}_{period}_delta'
                    delta_df_list.append(delta_values)

    # Combine all delta values into a single DataFrame
    delta_df = pd.DataFrame(delta_df_list)

    # Calculate the median of the differences for each Period
    diff_df = delta_df.groupby('Period').median().reset_index()

    # Add the differences to the ERA-5 values
    era5_df = df[df['Model'] == 'ERA-5'].drop(columns=['Model', 'Scenario', 'Period'])
    df_fut = diff_df.copy()
    for column in ['AAD', 'RP_10', 'RP_25', 'RP_50', 'RP_100', 'RP_250']:
        df_fut[column] = df_fut[column] + era5_df[column].values[0]

    # Add an identifier column to keep track of the exposure object
    df_fut['Exposure'] = key
    df_fut['Scenario'] = rcp
    era5_df['Scenario'] = rcp
    era5_df['Exposure'] = key
    era5_df['Period'] = 'hist'
    
    res_df = pd.concat([era5_df, df_fut], ignore_index=True)
    
    columns_order = ['Exposure', 'Period', 'Scenario', 'AAD', 'RP_10', 'RP_25', 'RP_50', 'RP_100', 'RP_250']
    res_df = res_df[columns_order]
    
    # Append the resulting DataFrame to the list
    all_dfs.append(res_df)

# Concatenate all DataFrames into a single DataFrame
final_combined_df = pd.concat(all_dfs, ignore_index=True)

In [44]:
final_combined_df

Unnamed: 0,Exposure,Period,Scenario,AAD,RP_10,RP_25,RP_50,RP_100,RP_250
0,admin0,hist,ssp370,6687.096842,0.0,0.0,0.0,47447.426898,285125.1
1,admin0,2050,ssp370,47073.544079,0.0,1477.730378,70954.745202,784674.426321,3454099.0
2,admin0,2100,ssp370,33187.70806,0.0,2356.785701,75464.613795,595989.859171,3478266.0
3,admin1_3260,hist,ssp370,0.0,0.0,0.0,0.0,0.0,0.0
4,admin1_3260,2050,ssp370,0.0,0.0,0.0,0.0,0.0,0.0
5,admin1_3260,2100,ssp370,17.407242,0.0,0.0,0.0,0.0,4074.601
6,admin1_3261,hist,ssp370,2435.145525,0.0,0.0,0.0,0.0,50855.96
7,admin1_3261,2050,ssp370,13854.612129,0.0,0.0,0.0,40706.683866,414432.5
8,admin1_3261,2100,ssp370,5237.253954,0.0,0.0,0.0,0.0,226499.1
9,admin1_3262,hist,ssp370,0.351178,0.0,0.0,0.0,0.0,0.0


In [45]:
final_combined_df[final_combined_df.Exposure != 'admin0'].groupby("Period").sum()

Unnamed: 0_level_0,AAD,RP_10,RP_25,RP_50,RP_100,RP_250
Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2050,29861.49009,0.0,456.090972,30126.517889,357041.473472,1564957.0
2100,32131.783457,0.0,166.916045,55829.597474,376814.985293,1668000.0
hist,6687.096842,0.0,0.0,0.0,35334.879021,211191.1
