In [1]:
import os,sys
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import pandas as pd
from osgeo import ogr,gdal
import xarray as xr
import rasterio
import numpy as np
import pyproj
from pygeos import from_wkb,from_wkt
import pygeos
from tqdm import tqdm
from shapely.wkb import loads
from pathlib import Path
import glob
from shapely.geometry import mapping
pd.options.mode.chained_assignment = None
from rasterio.mask import mask
import rioxarray
import matplotlib.pyplot as plt
from scipy import integrate
from collections.abc import Iterable
import openpyxl
from openpyxl import Workbook

import warnings
warnings.filterwarnings("ignore")

from scipy import integrate
from sklearn.preprocessing import MinMaxScaler

In [2]:
gdal.SetConfigOption("OSM_CONFIG_FILE", os.path.join('..',"osmconf.ini"))

# change paths to make it work on your own machine
data_path = os.path.join('C:\\','Data','pg_risk_analysis')
tc_path = os.path.join(data_path,'tc_netcdf')
fl_path = os.path.join(data_path,'GLOFRIS')
osm_data_path = os.path.join('C:\\','Data','country_osm')
pg_data_path = os.path.join(data_path,'pg_data')
vul_curve_path = os.path.join(data_path,'vulnerability_curves','input_vulnerability_data.xlsx')
output_path = os.path.join('C:\\','projects','pg_risk_analysis_output','output')
ne_path = os.path.join(data_path,'..',"natural_earth","ne_10m_admin_0_countries.shp")

In [3]:
def flatten(xs):
    for x in xs:
        if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
            yield from flatten(x)
        else:
            yield x

def query_b(geoType,keyCol,**valConstraint):
    """
    This function builds an SQL query from the values passed to the retrieve() function.
    Arguments:
         *geoType* : Type of geometry (osm layer) to search for.
         *keyCol* : A list of keys/columns that should be selected from the layer.
         ***valConstraint* : A dictionary of constraints for the values. e.g. WHERE 'value'>20 or 'value'='constraint'
    Returns:
        *string: : a SQL query string.
    """
    query = "SELECT " + "osm_id"
    for a in keyCol: query+= ","+ a  
    query += " FROM " + geoType + " WHERE "
    # If there are values in the dictionary, add constraint clauses
    if valConstraint: 
        for a in [*valConstraint]:
            # For each value of the key, add the constraint
            for b in valConstraint[a]: query += a + b
        query+= " AND "
    # Always ensures the first key/col provided is not Null.
    query+= ""+str(keyCol[0]) +" IS NOT NULL" 
    return query 


def retrieve(osm_path,geoType,keyCol,**valConstraint):
    """
    Function to extract specified geometry and keys/values from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
            for which we want to do the analysis.     
        *geoType* : Type of Geometry to retrieve. e.g. lines, multipolygons, etc.
        *keyCol* : These keys will be returned as columns in the dataframe.
        ***valConstraint: A dictionary specifiying the value constraints.  
        A key can have multiple values (as a list) for more than one constraint for key/value.  
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all columns, geometries, and constraints specified.    
    """
    driver=ogr.GetDriverByName('OSM')
    data = driver.Open(osm_path)
    query = query_b(geoType,keyCol,**valConstraint)
    sql_lyr = data.ExecuteSQL(query)
    features =[]
    # cl = columns 
    cl = ['osm_id'] 
    for a in keyCol: cl.append(a)
    if data is not None:
        print('query is finished, lets start the loop')
        for feature in tqdm(sql_lyr,desc='extract'):
            #try:
            if feature.GetField(keyCol[0]) is not None:
                geom1 = (feature.geometry().ExportToWkt())
                #print(geom1)
                geom = from_wkt(feature.geometry().ExportToWkt()) 
                if geom is None:
                    continue
                # field will become a row in the dataframe.
                field = []
                for i in cl: field.append(feature.GetField(i))
                field.append(geom)   
                features.append(field)
            #except:
            #    print("WARNING: skipped OSM feature")   
    else:
        print("ERROR: Nonetype error when requesting SQL. Check required.")    
    cl.append('geometry')                   
    if len(features) > 0:
        return pd.DataFrame(features,columns=cl)
    else:
        print("WARNING: No features or No Memory. returning empty GeoDataFrame") 
        return pd.DataFrame(columns=['osm_id','geometry'])

def power_polyline(osm_path):
    """
    Function to extract all energy linestrings from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'lines',['power','voltage'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #print(df) #check infra keys
    
    return df.reset_index(drop=True)


def power_polygon(osm_path):
    """
    Function to extract building polygons from OpenStreetMap    
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all unique building polygons.    
    """
    df = retrieve(osm_path,'multipolygons',['power','plant_source'])
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant'
        
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
        
    return df.reset_index(drop=True)


def power_point(osm_path):
    """
    Function to extract energy points from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy points.
    """   
    df = retrieve(osm_path,'points',['other_tags'])
    df = df.loc[(df.other_tags.str.contains('power'))]  #keep rows containing power data       
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})
        
    df['asset'].loc[df['asset'].str.contains('"power"=>"tower"', case=False)]  = 'power_tower' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"pole"', case=False)] = 'power_pole' #specify row
    
    df = df.loc[(df.asset == 'power_tower') | (df.asset == 'power_pole')]
            
    return df.reset_index(drop=True)

In [4]:
def reproject(df_ds, current_crs="epsg:4326", approximate_crs="epsg:3857"):
    """
    Reproject a GeoPandas DataFrame from one CRS to another.

    Parameters
    ----------
    df_ds : geopandas.GeoDataFrame
        The input GeoPandas DataFrame to reproject.
    current_crs : str, optional
        The current CRS of the input geometry column, by default "epsg:4326".
    approximate_crs : str, optional
        The target CRS to reproject to, by default "epsg:3857".

    Returns
    -------
    geopandas.GeoSeries
        The reprojected geometry column as a GeoPandas GeoSeries.
    """

    # Extract the input geometries as a numpy array of coordinates
    geometries = df_ds['geometry']
    coords = pygeos.get_coordinates(geometries)

    # Transform the coordinates using pyproj
    transformer = pyproj.Transformer.from_crs(current_crs, approximate_crs, always_xy=True)
    new_coords = transformer.transform(coords[:, 0], coords[:, 1])

    # Create a new GeoSeries with the reprojected coordinates
    return pygeos.set_coordinates(geometries.copy(), np.array(new_coords).T)

def buffer_assets(assets, buffer_size=100):
    """
    Create a buffer of a specified size around the geometries in a GeoDataFrame.
    
    Args:
        assets (GeoDataFrame): A GeoDataFrame containing geometries to be buffered.
        buffer_size (int, optional): The distance in the units of the GeoDataFrame's CRS to buffer the geometries.
            Defaults to 100.
    
    Returns:
        GeoDataFrame: A new GeoDataFrame with an additional column named 'buffered' containing the buffered
            geometries.
    """
    # Create a buffer of the specified size around the geometries
    assets['buffered'] = pygeos.buffer(assets.geometry.values, buffer_size)
    
    return assets

def load_curves_maxdam(country_code,vul_curve_path,hazard_type):
    """Load vulnerability curves and maximum damages for a specific country and hazard type.

    Args:
        country_code (str): Country code for the desired country.
        vul_curve_path (str): Path to the input vulnerability curves file.
        hazard_type (str): Type of hazard ('tc' for tropical cyclone, 'fl' for coastal flooding).

    Returns:
        tuple: A tuple containing two pandas DataFrames:
               - curves: Vulnerability curves.
               - maxdam: Maximum damages.
    """

    # dictionary of GDP per capita ratio for each country
    gdp_ratio = {
        "BRN": {"ratio_usa": 0.5201},
        "KHM": {"ratio_usa": 0.0240},
        "CHN": {"ratio_usa": 0.1772},
        "IDN": {"ratio_usa": 0.0647},
        "JPN": {"ratio_usa": 0.5912},
        "LAO": {"ratio_usa": 0.0434},
        "MYS": {"ratio_usa": 0.1775},
        "MNG": {"ratio_usa": 0.0703},
        "MMR": {"ratio_usa": 0.0276},
        "PRK": {"ratio_usa": 0.0106},
        "PHL": {"ratio_usa": 0.0547},
        "SGP": {"ratio_usa": 1.0091},
        "KOR": {"ratio_usa": 0.5367},
        "TWN": {"ratio_usa": 0.4888},
        "THA": {"ratio_usa": 0.1034},
        "VNM": {"ratio_usa": 0.0573},
        "HKG": {"ratio_usa": 0.7091},
        "MAC": {"ratio_usa": 0.5913}}
    
    if hazard_type == 'tc':
        sheet_name = 'wind_curves'
        
        # load curves and maximum damages as separate inputs
        curves = pd.read_excel(vul_curve_path,sheet_name=sheet_name,skiprows=11)
        
        # dictionary of design wind speeds for each country
        design_wind_speed = {
            "BRN": {"dws": 32},
            "KHM": {"dws": 32},
            "CHN": {"dws": 52},
            "IDN": {"dws": 32},
            "JPN": {"dws": 52},
            "LAO": {"dws": 32},
            "MYS": {"dws": 32},
            "MNG": {"dws": 0},
            "MMR": {"dws": 39},
            "PRK": {"dws": 39},
            "PHL": {"dws": 52},
            "SGP": {"dws": 32},
            "KOR": {"dws": 52},
            "TWN": {"dws": 60},
            "THA": {"dws": 39},
            "VNM": {"dws": 44}}
        dws = design_wind_speed.get(country_code, {}).get("dws", None)
        
        # shift design wind speed of all curves to 60 m/s
        scaling_factor = dws / 60

        curves = curves.apply(lambda x: x * scaling_factor if pd.api.types.is_numeric_dtype(x) else x)
        curves = curves.set_index('Wind speed (m/s)')
        
    elif hazard_type == 'fl':
        sheet_name = 'flooding_curves'    
        
        # load curves and maximum damages as separate inputs
        curves = pd.read_excel(vul_curve_path,sheet_name=sheet_name,skiprows=11,index_col=[0])

    maxdam = pd.read_excel(vul_curve_path,sheet_name=sheet_name,index_col=[0],header=[0,1]).iloc[:8]
    curves.columns = maxdam.columns
    
    #interpolate the curves to fill missing values
    curves = curves.interpolate()
    
    #transpose maxdam so its easier work with the dataframe
    maxdam = maxdam.T
    
    ratio_usa = gdp_ratio.get(country_code, {}).get("ratio_usa", None)

    if ratio_usa is not None:
        print(f"The ratio_usa for {country_code} is {ratio_usa}")
    else:
        print(f"No ratio_usa found for {country_code}")
        
    maxdam['MaxDam'] = maxdam['MaxDam'] * ratio_usa
    maxdam['LowerDam'] = maxdam['LowerDam'] * ratio_usa
    maxdam['UpperDam'] = maxdam['UpperDam'] * ratio_usa

    return curves,maxdam


def overlay_hazard_assets(df_ds, assets):
    """
    Overlay a set of assets with a hazard dataset and return the subset of assets that intersect with
    one or more hazard polygons or lines.
    
    Args:
        df_ds (GeoDataFrame): A GeoDataFrame containing the hazard dataset.
        assets (GeoDataFrame): A GeoDataFrame containing the assets to be overlaid with the hazard dataset.
    
    Returns:
        ndarray: A numpy array of integers representing the indices of the hazard geometries that intersect with
            the assets. If the assets have a 'buffered' column, the buffered geometries are used for the overlay.
    """
    hazard_tree = pygeos.STRtree(df_ds.geometry.values)
    #if (pygeos.get_type_id(assets.iloc[0].geometry) == 3) | (pygeos.get_type_id(assets.iloc[0].geometry) == 6):
    if len(assets) > 0:
        if (pygeos.get_type_id(assets.iloc[0].geometry) == 3) or (pygeos.get_type_id(assets.iloc[0].geometry) == 6):
            return hazard_tree.query_bulk(assets.geometry,predicate='intersects')    
        else:
            return hazard_tree.query_bulk(assets.buffered,predicate='intersects')
    
    else:
        return hazard_tree.query_bulk(assets.buffered,predicate='intersects')
    
    
def get_damage_per_asset_per_rp(asset,df_ds,assets,curves,maxdam,return_period,country):
    """
    Calculates the damage per asset per return period based on asset type, hazard curves and maximum damage

    Args:
        asset (tuple): Tuple with two dictionaries, containing the asset index and the hazard point index of the asset
        df_ds (pandas.DataFrame): A pandas DataFrame containing hazard points with a 'geometry' column
        assets (geopandas.GeoDataFrame): A GeoDataFrame containing asset geometries and asset type information
        curves (dict): A dictionary with the asset types as keys and their corresponding hazard curves as values
        maxdam (pandas.DataFrame): A pandas DataFrame containing the maximum damage for each asset type
        return_period (str): The return period for which the damage should be calculated
        country (str): The country for which the damage should be calculated

    Returns:
        list or tuple: Depending on the input, the function either returns a list of tuples with the asset index, the curve name and the calculated damage, or a tuple with None,
        None, None if no hazard points are found
    """
    
    # find the exact hazard overlays:
    get_hazard_points = df_ds.iloc[asset[1]['hazard_point'].values].reset_index()
    get_hazard_points = get_hazard_points.loc[pygeos.intersects(get_hazard_points.geometry.values,assets.iloc[asset[0]].geometry)]

    asset_type = assets.iloc[asset[0]].asset
    asset_geom = assets.iloc[asset[0]].geometry

    if asset_type in ['plant','substation','generator']:
        # if plant,substation are points, do not calculate the area
        if pygeos.area(asset_geom) == 0:
            maxdam_asset = maxdam.loc[asset_type].MaxDam
            lowerdam_asset = maxdam.loc[asset_type].LowerDam
            upperdam_asset = maxdam.loc[asset_type].UpperDam
        else:
            maxdam_asset = maxdam.loc[asset_type].MaxDam/pygeos.area(asset_geom)
            lowerdam_asset = maxdam.loc[asset_type].LowerDam/pygeos.area(asset_geom)
            upperdam_asset = maxdam.loc[asset_type].UpperDam/pygeos.area(asset_geom)
    else:
        maxdam_asset = maxdam.loc[asset_type].MaxDam
        lowerdam_asset = maxdam.loc[asset_type].LowerDam
        upperdam_asset = maxdam.loc[asset_type].UpperDam

    hazard_intensity = curves[asset_type].index.values
    
    if isinstance(curves[asset_type],pd.core.series.Series):
        fragility_values = curves[asset_type].values.flatten()
        only_one = True
        curve_name = curves[asset_type].name
    elif len(curves[asset_type].columns) == 1:
        fragility_values = curves[asset_type].values.flatten()      
        only_one = True   
        curve_name = curves[asset_type].columns[0]
    else:
        fragility_values = curves[asset_type].values#.T[0]
        maxdam_asset = maxdam_asset.values#[0]
        only_one = False

    if len(get_hazard_points) == 0:
        if only_one:
            return [return_period,asset[0],curve_name,0,0,0]
        else:
            return [return_period,asset[0],curves[asset_type].columns[0],0,0,0]
            
    else:
        if only_one:    
            # run the calculation as normal when the asset just has a single curve
            if pygeos.get_type_id(asset_geom) == 1:            
                get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
                return [return_period,asset[0],curve_name,np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*maxdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*lowerdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*upperdam_asset)]

            elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
                return [return_period,asset[0],curve_name,get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*maxdam_asset*x.overlay_m2,axis=1).sum(),
                                                          get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*lowerdam_asset*x.overlay_m2,axis=1).sum(),
                                                          get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*upperdam_asset*x.overlay_m2,axis=1).sum()]  

            else:
                return [return_period,asset[0],curve_name,np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*maxdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*lowerdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*upperdam_asset)]
        else:
            # run the calculation when the asset has multiple curves
            if pygeos.get_type_id(asset_geom) == 1:            
                get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            
            collect_all = []
            for iter_,curve_ids in enumerate(curves[asset_type].columns):
                if pygeos.get_type_id(asset_geom) == 1:
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*maxdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*lowerdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*upperdam_asset[iter_])])
                                   
                elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*maxdam_asset[iter_]*x.overlay_m2,axis=1).sum(),
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*lowerdam_asset[iter_]*x.overlay_m2,axis=1).sum(),
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*upperdam_asset[iter_]*x.overlay_m2,axis=1).sum()])

                else:
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*maxdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*lowerdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*upperdam_asset[iter_])])
            return collect_all

In [5]:
def load_storm_data(climate_model,basin,bbox):
    """
    Load storm data from a NetCDF file and process it to return a pandas DataFrame.

    Parameters:
    - climate_model (str): name of the climate model
    - basin (str): name of the basin
    - bbox (tuple): bounding box coordinates in the format (minx, miny, maxx, maxy)
    - ne_crs (str): CRS string of the North-East projection

    Returns:
    - df_ds (pd.DataFrame): pandas DataFrame with interpolated wind speeds for different return periods and geometry column
    """

    filename = os.path.join(tc_path, f'STORM_FIXED_RETURN_PERIODS{climate_model}_{basin}.nc')
    
    # load data from NetCDF file
    with xr.open_dataset(filename) as ds:
        
        # convert data to WGS84 CRS
        ds.rio.write_crs(4326, inplace=True)
        ds = ds.rio.clip_box(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])
        
        #convert 10-min sustained wind speed to 3-s gust wind speed
        ds['mean_3s'] = ds['mean']/0.88*1.11

        # get the mean values
        df_ds = ds['mean_3s'].to_dataframe().unstack(level=2).reset_index()

        # create geometry values and drop lat lon columns
        df_ds['geometry'] = [pygeos.points(x) for x in list(zip(df_ds['lon'], df_ds['lat']))]
        df_ds = df_ds.drop(['lat', 'lon'], axis=1, level=0)
        
        # interpolate wind speeds of 1,2,5,25,and 250-yr return period
        ## rename columns to return periods (must be integer for interpolating)
        df_ds_geometry = pd.DataFrame()
        df_ds_geometry['geometry'] = df_ds['geometry']
        df_ds = df_ds.drop(['geometry'], axis=1, level=0)
        df_ds = df_ds['mean_3s']
        df_ds.columns = [int(x) for x in ds['mean_3s']['rp']]
        df_ds[1] = np.nan
        df_ds[2] = np.nan
        df_ds[5] = np.nan
        df_ds[25] = np.nan
        df_ds[250] = np.nan
        df_ds = df_ds.reindex(sorted(df_ds.columns), axis=1)
        df_ds = df_ds.interpolate(method='pchip', axis=1, limit_direction='both')
        df_ds['geometry'] = df_ds_geometry['geometry']
        df_ds = df_ds[[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 'geometry']]
        
        # rename columns to return periods
        df_ds.columns = ['1_{}{}'.format(int(x), climate_model) for x in [1, 2, 5, 10, 25, 50, 100, 250, 500, 1000]] +['geometry']
        df_ds['geometry'] = pygeos.buffer(df_ds.geometry, radius=0.1/2, cap_style='square').values
        
        # reproject the geometry column to the specified CRS
        df_ds['geometry'] = reproject(df_ds)
            
        # drop all non values to reduce size
        #df_ds = df_ds.loc[~df_ds['1_10000{}'.format(climate_model)].isna()].reset_index(drop=True)
        df_ds = df_ds.fillna(0)

    return df_ds

def open_storm_data(country_code):
    """
    This function loads STORM data for a given country code, clips it based on the country geometry,
    and combines data from different basins and climate models.

    Args:
    - country_code (str): a 3-letter ISO code of the country of interest

    Returns:
    - df_ds (dict): a dictionary containing STORM data for different climate models, organized by basin
    """
   
    # list of available climate models
    climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

    # dictionary of basins for each country
    country_basin = {
        "BRN": ["WP"],
        "KHM": ["WP"],
        "CHN": ["WP", "NI"],
        "IDN": ["SI", "SP", "NI", "WP"],
        "JPN": ["WP"],
        "LAO": ["WP"],
        "MYS": ["WP", "NI"],
        "MNG": ["WP", "NI"],
        "MMR": ["NI", "WP"],
        "PRK": ["WP"],
        "PHL": ["WP"],
        "SGP": ["WP"],
        "KOR": ["WP"],
        "TWN": ["WP"],
        "THA": ["WP", "NI"],
        "VNM": ["WP"]
    }

    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file(os.path.join(data_path,'..',"natural_earth","ne_10m_admin_0_countries.shp"))
    bbox = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.buffer(1).values[0].bounds

    df_ds = {}
    for climate_model in climate_models:
        concat_prep = []

        #combine STORM data from different basins
        if "WP" in country_basin[country_code]:
            WP = load_storm_data(climate_model,'WP',bbox)
            concat_prep.append(WP)
        if "SP" in country_basin[country_code]:
            SP = load_storm_data(climate_model,'SP',bbox)
            concat_prep.append(SP)
        if "NI" in country_basin[country_code]:            
            NI = load_storm_data(climate_model,'NI',bbox)
            concat_prep.append(NI)            
        if "SI" in country_basin[country_code]:       
            SI = load_storm_data(climate_model,'SI',bbox)
            concat_prep.append(SI)            
                   
        df_ds_cl = pd.concat(concat_prep, keys=country_basin[country_code])
        df_ds_cl = df_ds_cl.reset_index(drop=True)
        df_ds[climate_model] = df_ds_cl

    return df_ds

In [6]:
df_tc = open_storm_data('PHL')
df_tc['']

Unnamed: 0,1_1,1_2,1_5,1_10,1_25,1_50,1_100,1_250,1_500,1_1000,geometry
0,21.059509,21.062296,21.070679,21.084724,21.127390,21.200173,21.351295,21.834707,22.644577,23.747100,"POLYGON ((12913060.932 568480.588, 12913060.93..."
1,24.652051,24.643577,24.618305,24.576689,24.455587,24.266004,23.931081,23.246079,22.942404,23.698079,"POLYGON ((12924192.881 568480.588, 12924192.88..."
2,28.971906,28.944054,28.860980,28.724116,28.325357,27.699389,26.586265,24.236320,22.810727,23.568668,"POLYGON ((12935324.83 568480.588, 12935324.83 ..."
3,20.745839,20.749080,20.758798,20.774989,20.823505,20.904165,21.064632,21.537098,22.280710,23.487972,"POLYGON ((12946456.779 568480.588, 12946456.77..."
4,32.995612,32.956003,32.837771,32.642688,32.072085,31.169016,29.535995,25.888949,23.097848,23.450698,"POLYGON ((12957588.728 568480.588, 12957588.72..."
...,...,...,...,...,...,...,...,...,...,...,...
20291,45.053105,45.599223,47.249789,49.913732,55.108316,57.796014,60.283663,62.399877,63.633549,64.834125,"POLYGON ((14170971.178 2535554.62, 14170971.17..."
20292,45.142617,45.684890,47.330674,49.999396,55.105428,58.178080,60.419198,62.713213,63.973463,64.905099,"POLYGON ((14182103.127 2535554.62, 14182103.12..."
20293,45.124814,45.671325,47.324048,49.993222,55.157299,58.405548,60.729709,62.975060,64.463318,65.869845,"POLYGON ((14193235.076 2535554.62, 14193235.07..."
20294,45.303961,45.839151,47.442258,50.004214,55.127359,58.350225,60.568494,63.581671,65.095541,66.598622,"POLYGON ((14204367.025 2535554.62, 14204367.02..."


In [7]:
def clip_flood_data(country_code):
    """
    Clip global flood data for a specific country.

    Args:
        country_code (str): Country code for the desired country.

    Returns:
        None
    """
    
    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file(ne_path)
    geometry = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.values[0]
    geoms = [mapping(geometry)]
    
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    climate_models = ['historical','rcp8p5']
    
    for rp in rps:
        #global input_file
        for climate_model in climate_models:
            if climate_model=='historical':
                input_file = os.path.join(fl_path,'global',
                                          'inuncoast_{}_nosub_hist_rp{}_0.tif'.format(climate_model,rp)) 
 
            elif climate_model=='rcp8p5':
                input_file = os.path.join(fl_path,'global',
                                          'inuncoast_{}_nosub_2030_rp{}_0.tif'.format(climate_model,rp))

            # load raster file and save clipped version
            with rasterio.open(input_file) as src:
                out_image, out_transform = mask(src, geoms, crop=True)
                out_meta = src.meta

                out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

                if 'scistor' in fl_path:
                    file_path = os.path.join(fl_path,'country','_'.join([country_code]+input_file.split('_')[6:]))
                else:
                    file_path = os.path.join(fl_path,'country','_'.join([country_code]+input_file.split('_')[3:]))

                with rasterio.open(file_path, "w", **out_meta) as dest:
                    dest.write(out_image)

def load_flood_data(country_code,climate_model):
    """
    Load flood data for a specific country and climate model.

    Args:
        country_code (str): Country code for the desired country.
        climate_model (str): Climate model ('historical' or 'rcp8p5').

    Returns:
        pandas.DataFrame: Flood data for the specified country and climate model.
    """
     
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    collect_df_ds = []
    
    if climate_model=='historical':
        print('Loading historical coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_hist_rp{}_0.tif'.format(country_code,climate_model,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                
                # move from meters to centimeters
                df_ds['rp'+rp] = (df_ds['rp'+rp]*100)         
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=0.0089932/2,cap_style='square').values  # the original value here is 0.00833???
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])
        df_all = df_all.loc[df_all['rp1000']>0].reset_index(drop=True)

    elif climate_model=='rcp8p5':
        print('Loading future coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_2030_rp{}_0.tif'.format(country_code,climate_model,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                df_ds['rp'+rp] = (df_ds['rp'+rp]*100)
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=0.00833/2,cap_style='square').values
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])

        df_all = df_all.loc[df_all['rp1000']>0].reset_index(drop=True)
    return df_all

def open_flood_data(country_code):
    """
    Open and load flood data for a specific country.

    Args:
        country_code (str): Country code for the desired country.

    Returns:
        dict: A dictionary containing the loaded flood data for different climate models.
    """
    
    climate_models = ['historical','rcp8p5']
    df_ds = {}
    for climate_model in climate_models:
        df_ds_sc = load_flood_data(country_code,climate_model)
        df_ds[climate_model] = df_ds_sc
    
    return df_ds

In [8]:
df_fl = open_flood_data('PHL')
df_fl['historical']

Loading historical coastal flood data ...
Loading future coastal flood data ...


Unnamed: 0,rp0001,geometry,rp0002,rp0005,rp0010,rp0025,rp0050,rp0100,rp0250,rp0500,rp1000
0,0.000000,"POLYGON ((13020706.501 892564.07, 13020706.501...",0.000000,0.000000,0.000000,4.009700,8.493829,12.944865,18.805336,23.230480,27.652431
1,0.000000,"POLYGON ((13020706.501 891627.319, 13020706.50...",0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.116062,4.538012
2,46.241997,"POLYGON ((13020706.501 890690.587, 13020706.50...",49.178028,56.403519,61.187408,67.231873,71.715996,76.167038,82.027504,86.452652,90.874603
3,44.351685,"POLYGON ((13020706.501 889753.874, 13020706.50...",47.287716,54.513203,59.297096,65.341553,69.825684,74.276718,80.137192,84.562340,88.984283
4,0.000000,"POLYGON ((13020706.501 887880.505, 13020706.50...",0.000000,0.000000,4.245019,10.289478,14.773607,19.224644,25.085115,29.510260,33.932209
...,...,...,...,...,...,...,...,...,...,...,...
4447,52.176750,"POLYGON ((14068965.039 926300.011, 14068965.03...",53.190361,55.684914,57.336510,59.423317,60.971439,62.508095,64.531410,66.059151,67.585770
4448,0.000000,"POLYGON ((14068965.039 925362.558, 14068965.03...",0.000000,0.000000,0.000000,0.000000,0.000000,0.981641,3.004956,4.532695,6.059313
4449,7.122862,"POLYGON ((14089373.613 827036.51, 14089373.613...",8.115935,10.559845,12.177944,14.222383,15.739083,17.244577,19.226789,20.723534,22.219181
4450,11.667931,"POLYGON ((14090301.275 860725.127, 14090301.27...",12.681543,15.176094,16.827690,18.914497,20.462620,21.999275,24.022591,25.550329,27.076948


# OSM data processing

In [9]:
def extract_osm_infrastructure(country_code,osm_data_path):
    """
    Extract OSM (OpenStreetMap) infrastructure data for a specific country.

    Args:
        country_code (str): Country code for the desired country.
        osm_data_path (str): Path to the OSM data.

    Returns:
        tuple: A tuple containing three pandas DataFrames:
            - osm_lines: OSM infrastructure lines data.
            - osm_polygons: OSM infrastructure polygons data.
            - osm_points: OSM infrastructure points data.
    """
    
    # lines
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_lines = power_polyline(osm_path)
    osm_lines['geometry'] = reproject(osm_lines)
    osm_lines = buffer_assets(osm_lines.loc[osm_lines.asset.isin(
        ['cable','minor_cable','line','minor_line'])],buffer_size=100).reset_index(drop=True)
    
    # polygons
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_polygons = power_polygon(osm_path)
    osm_polygons['geometry'] = reproject(osm_polygons)
    
    # points
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_points = power_point(osm_path)
    osm_points['geometry'] = reproject(osm_points)
    osm_points = buffer_assets(osm_points.loc[osm_points.asset.isin(
        ['power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)
    
    return osm_lines,osm_polygons,osm_points


In [10]:
def assess_damage_osm(country_code,osm_power_infra,hazard_type): #NEW VERSION
    """
    Assess the damage to OSM (OpenStreetMap) infrastructure for a specific country and hazard type.

    Args:
        country_code (str): Country code for the desired country.
        osm_power_infra (tuple): A tuple containing three pandas DataFrames:
            - osm_lines: OSM infrastructure lines data.
            - osm_polygons: OSM infrastructure polygons data.
            - osm_points: OSM infrastructure points data.
        hazard_type (str): Type of hazard ('tc' for tropical cyclone or 'fl' for coastal flooding).

    Returns:
        tuple: A tuple containing three pandas DataFrames:
            - damaged_lines: Damage assessment results for OSM infrastructure lines.
            - damaged_poly: Damage assessment results for OSM infrastructure polygons.
            - damaged_points: Damage assessment results for OSM infrastructure points.
    """
    
    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(country_code,vul_curve_path,hazard_type)
    
    # read infrastructure data:
    osm_lines,osm_poly,osm_points = osm_power_infra
    
    #calculate damaged lines/polygons/points in loop by climate_model
    damaged_lines = {}
    damaged_poly = {}
    damaged_points = {}

    if hazard_type=='tc':
        # read wind data
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        df_ds = open_storm_data(country_code)

        # remove assets that will not have any damage
        osm_lines = osm_lines.loc[osm_lines.asset != 'cable'].reset_index(drop=True)
        osm_lines['asset'] = osm_lines['asset'].replace(['minor_line'], 'line')
        osm_poly = osm_poly.loc[osm_poly.asset != 'plant'].reset_index(drop=True)            
    
    elif hazard_type=='fl':
        # read flood data
        climate_models = ['historical','rcp8p5']
        df_ds = open_flood_data(country_code)
        
    for climate_model in climate_models:
        if hazard_type=='tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000']     
    
        # assess damage for lines
        overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_lines).T,
                                     columns=['asset','hazard_point'])

        if len(overlay_lines) == 0:
            damaged_lines[climate_model] = pd.DataFrame()

        else:
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            osm_lines,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_line = dict(zip(osm_lines.index,osm_lines.asset))
            
            if hazard_type == 'tc':
                results = pd.DataFrame([item for sublist in collect_line_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

            elif hazard_type == 'fl':
                #results = pd.DataFrame(collect_line_damages,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results = pd.DataFrame(np.array(list(flatten(collect_line_damages))).reshape(
                    int(len(list(flatten(collect_line_damages)))/6), 6),
                                       columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset'] = results['asset'].astype(int)
                results[['meandam','lowerdam','upperdam']] = results[['meandam','lowerdam','upperdam']].astype(float)
                
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                #sum damage of line, cable, and minor_line
                results['curve'] = results['curve'].replace(['cable', 'minor_line'], 'line')
                results['asset_type'] = results['asset_type'].replace(['cable', 'minor_line'], 'line')

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
                
        # assess damage for polygons
        if len(osm_poly) > 0:
            overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_poly).T,
                                    columns=['asset','hazard_point'])
        else:
            overlay_poly = pd.DataFrame()

        if len(overlay_poly) == 0:
            damaged_poly[climate_model] = pd.DataFrame()

        else:
            collect_poly_damages = []
            for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),
                              desc='polygon damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_poly_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            osm_poly,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_poly = dict(zip(osm_poly.index,osm_poly.asset))
            
            results = pd.DataFrame([item for sublist in collect_poly_damages 
                                    for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

            results['asset_type'] = results.asset.apply(lambda x : get_asset_type_poly[x])    

            damaged_poly[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
                

        #assess damage for points
        overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_points).T,
                                      columns=['asset','hazard_point'])

        if len(overlay_points) == 0:
            damaged_points[climate_model] = pd.DataFrame()

        else:
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    
                    # check_error = get_damage_per_asset_per_rp(asset,df_ds[climate_model],osm_points,curves,maxdam,return_period,country_code)
                    # with open(os.path.join(output_path,'get_damage_per_asset_per_rp_{}_{}.pkl'.format(country_code,climate_model)), 'wb') as f:
                    #     pickle.dump(check_error,f)

                    collect_point_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            osm_points,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_point = dict(zip(osm_points.index,osm_points.asset))
            
            if hazard_type == 'tc':
                #catch and remove integers in the sublists of collect_point_damages
                collect_point_damages = [[item for item in sublist if not isinstance(item, int)] for sublist in collect_point_damages]
                collect_point_damages = [[item for item in sublist if len(item) == 6] for sublist in collect_point_damages]

                # with open(os.path.join(output_path,'collect_point_damages_{}_{}.pkl'.format(country_code,climate_model)), 'wb') as f:
                #     pickle.dump(collect_point_damages,f)

                results = pd.DataFrame([item for sublist in collect_point_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

                results.drop(results.index[(results.iloc[:, 1] == '_')], inplace=True)
                results.drop(results.index[(results.iloc[:, 1] == '3')], inplace=True)

                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])
                
                damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
            
            elif hazard_type == 'fl':
                results = pd.DataFrame(np.array(list(flatten(collect_point_damages))).reshape(
                    int(len(list(flatten(collect_point_damages)))/6), 6),
                                       columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset'] = results['asset'].astype(int)
                results[['meandam','lowerdam','upperdam']] = results[['meandam','lowerdam','upperdam']].astype(float)
                
                #return collect_point_damages,get_asset_type_point
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])    

                damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

    return damaged_lines,damaged_poly,damaged_points

In [11]:
osm_power_infra = extract_osm_infrastructure('PHL',osm_data_path)

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████| 8145/8145 [00:34<00:00, 234.82it/s]


query is finished, lets start the loop


extract: 100%|███████████████████████████████████████████████████████████████████████| 365/365 [02:49<00:00,  2.15it/s]


query is finished, lets start the loop


extract: 100%|███████████████████████████████████████████████████████████████| 440360/440360 [00:46<00:00, 9468.84it/s]


In [12]:
phl_osm_damage = assess_damage_osm('PHL',osm_power_infra,'fl')

The ratio_usa for PHL is 0.0547
Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for PHL fl (historical): 100%|███████████████████████████| 146/146 [00:19<00:00,  7.38it/s]
polygon damage calculation for PHL fl (historical): 100%|████████████████████████████████| 7/7 [00:00<00:00,  7.15it/s]
point damage calculation for PHL fl (historical): 100%|██████████████████████████████| 877/877 [00:42<00:00, 20.53it/s]
polyline damage calculation for PHL fl (rcp8p5): 100%|███████████████████████████████| 133/133 [00:18<00:00,  7.12it/s]
polygon damage calculation for PHL fl (rcp8p5): 100%|████████████████████████████████████| 7/7 [00:00<00:00,  7.12it/s]
point damage calculation for PHL fl (rcp8p5): 100%|██████████████████████████████████| 877/877 [00:41<00:00, 21.14it/s]


In [13]:
def country_analysis_osm(country_code,hazard_type):
    """
    Perform country analysis for OSM (OpenStreetMap) infrastructure and assess damage.

    Args:
        country_code (str): Country code for the desired country.
        hazard_type (str): Type of hazard ('tc' for tropical cyclone or 'fl' for coastal flooding).

    Returns:
        dict: A dictionary containing risk assessments for different OSM infrastructure types.
    """
    
        # extract infrastructure data from OSM
    osm_power_infra = extract_osm_infrastructure(country_code,osm_data_path)
    
    # assess damage to hazard_type
    osm_damage_infra = assess_damage_osm(country_code,osm_power_infra,hazard_type)
    
    line_risk = {}
    plant_risk = {}
    substation_risk = {}
    tower_risk = {}
    pole_risk = {}

    if hazard_type=='tc':
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

        for i in range(len(osm_damage_infra)):
            for climate_model in climate_models:
                df = osm_damage_infra[i][climate_model]

                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_osm_{}{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),
                                                '1_10{}'.format(climate_model),'1_25{}'.format(climate_model),'1_50{}'.format(climate_model),
                                                '1_100{}'.format(climate_model),'1_250{}'.format(climate_model),'1_500{}'.format(climate_model),
                                                '1_1000{}'.format(climate_model)],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])

                    curve_code_substation = ['W2_1_1','W2_1_2','W2_1_3','W2_1_4','W2_1_5','W2_1_6','W2_2_1','W2_2_2','W2_2_3','W2_2_4','W2_2_5','W2_2_6',
                                             'W2_3_1','W2_3_2','W2_3_3','W2_3_4','W2_3_5','W2_3_6','W2_4_1','W2_4_2','W2_4_3','W2_4_4','W2_4_5','W2_4_6',
                                             'W2_5_1','W2_5_2','W2_5_3','W2_5_4','W2_5_5','W2_5_6','W2_6_1','W2_6_2','W2_6_3','W2_6_4','W2_6_5','W2_6_6',
                                             'W2_7_1','W2_7_2','W2_7_3','W2_7_4','W2_7_5','W2_7_6']


                    curve_code_tower = ['W3_1','W3_2','W3_3','W3_4','W3_5','W3_6','W3_7','W3_8','W3_9','W3_10','W3_11','W3_12','W3_13','W3_14','W3_15',
                                        'W3_16','W3_17','W3_18','W3_19','W3_20','W3_21','W3_22','W3_23','W3_24','W3_25','W3_26','W3_27','W3_28']

                    curve_code_pole = ['W4_1','W4_2','W4_3','W4_4','W4_5','W4_6','W4_7','W4_8','W4_9','W4_10','W4_11','W4_12',
                                       'W4_13','W4_14','W4_15','W4_16','W4_17','W4_18','W4_19','W4_20','W4_21','W4_22','W4_23',
                                       'W4_24','W4_25','W4_26','W4_27','W4_28','W4_29','W4_30','W4_31','W4_32','W4_33','W4_34',
                                       'W4_35','W4_36','W4_37','W4_38','W4_39','W4_40','W4_41','W4_42','W4_43','W4_44','W4_45',
                                       'W4_46','W4_47','W4_48','W4_49','W4_50','W4_51','W4_52','W4_53','W4_54','W4_55','W4_56']

                    curve_code_line = ['W5_1_1','W5_1_2','W5_1_3','W5_1_4','W5_1_5','W5_1_6','W5_1_7','W5_1_8','W5_1_9','W5_1_10','W5_1_11','W5_1_12',
                                       'W5_2_1','W5_2_2','W5_2_3','W5_2_4','W5_2_5','W5_2_6','W5_2_7','W5_2_8','W5_2_9','W5_2_10','W5_2_11','W5_2_12',
                                       'W5_3_1','W5_3_2','W5_3_3','W5_3_4','W5_3_5','W5_3_6','W5_3_7','W5_3_8','W5_3_9','W5_3_10','W5_3_11','W5_3_12',
                                       'W5_4_1','W5_4_2','W5_4_3','W5_4_4','W5_4_5','W5_4_6','W5_4_7','W5_4_8','W5_4_9','W5_4_10','W5_4_11','W5_4_12',
                                       'W5_5_1','W5_5_2','W5_5_3','W5_5_4','W5_5_5','W5_5_6','W5_5_7','W5_5_8','W5_5_9','W5_5_10','W5_5_11','W5_5_12',
                                       'W5_6_1','W5_6_2','W5_6_3','W5_6_4','W5_6_5','W5_6_6','W5_6_7','W5_6_8','W5_6_9','W5_6_10','W5_6_11','W5_6_12',
                                       'W5_7_1','W5_7_2','W5_7_3','W5_7_4','W5_7_5','W5_7_6','W5_7_7','W5_7_8','W5_7_9','W5_7_10','W5_7_11','W5_7_12']

                    #assess risk for power lines
                    if i == 0:
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power lines ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                line_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                    #assess risk for power substations                
                    elif i == 1:                        
                        for curve_code in curve_code_substation:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of substations ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                substation_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                    #assess risk for power towers and power poles
                    elif i == 2:
                        for curve_code in curve_code_tower:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power towers ...")

                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                tower_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                        for curve_code in curve_code_pole:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power poles ...")

                            else:                    
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                pole_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

    elif hazard_type=='fl':
        climate_models = ['historical','rcp8p5']
    
        for i in range(len(osm_damage_infra)):
            for climate_model in climate_models:
                df = osm_damage_infra[i][climate_model]
                    
                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_osm_{}_{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])
                    
                    curve_code_plant = ['F1_1_1','F1_1_2','F1_1_3']
                    curve_code_substation = ['F2_1_1','F2_1_2','F2_1_3']
                    curve_code_tower = ['F3_1_1','F3_1_2']
                    curve_code_pole = ['F4_1_1','F4_1_2','F4_1_3','F4_1_4']
                    curve_code_line = ['F5_1_1','F5_1_2','F5_1_3','F5_1_4','F5_1_5','F5_1_6','F5_1_7','F5_1_8',
                                      'F5_1_9','F5_1_10','F5_1_11','F5_1_12']
                    curve_code_minor_line = ['F5_2']
                    curve_code_cable = ['F5_3_1','F5_3_2']

                    #assess risk for power lines
                    if i == 0:
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of power lines ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                line_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                    #assess risk for power plants and substations                
                    elif i == 1:
                        for curve_code in curve_code_plant:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of plants ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                plant_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                        for curve_code in curve_code_substation:    
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of substations ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                substation_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                    }

                    #assess risk for power towers and power poles
                    elif i == 2:
                        for curve_code in curve_code_tower:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of power towers ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                tower_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                            
                        for curve_code in curve_code_pole:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of power poles ...")
                            
                            else:                    
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                pole_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                                
    return pd.DataFrame(line_risk),pd.DataFrame(plant_risk),pd.DataFrame(substation_risk),pd.DataFrame(tower_risk),pd.DataFrame(pole_risk)

In [14]:
phl_osm_risk = country_analysis_osm('PHL','fl')
phl_osm_risk

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████| 8145/8145 [00:33<00:00, 241.72it/s]


query is finished, lets start the loop


extract: 100%|███████████████████████████████████████████████████████████████████████| 365/365 [02:52<00:00,  2.11it/s]


query is finished, lets start the loop


extract: 100%|███████████████████████████████████████████████████████████████| 440360/440360 [00:46<00:00, 9455.59it/s]


The ratio_usa for PHL is 0.0547
Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for PHL fl (historical): 100%|███████████████████████████| 146/146 [00:19<00:00,  7.42it/s]
polygon damage calculation for PHL fl (historical): 100%|████████████████████████████████| 7/7 [00:01<00:00,  6.74it/s]
point damage calculation for PHL fl (historical): 100%|██████████████████████████████| 877/877 [00:41<00:00, 21.19it/s]
polyline damage calculation for PHL fl (rcp8p5): 100%|███████████████████████████████| 133/133 [00:19<00:00,  6.96it/s]
polygon damage calculation for PHL fl (rcp8p5): 100%|████████████████████████████████████| 7/7 [00:00<00:00,  7.27it/s]
point damage calculation for PHL fl (rcp8p5): 100%|██████████████████████████████████| 877/877 [00:41<00:00, 20.97it/s]


(             historical                                                      \
                  F5_1_1       F5_1_2       F5_1_3       F5_1_4       F5_1_5   
 mean_risk   1560.590879  1716.649967  2028.768142  2543.763132  2798.142234   
 lower_risk  1170.443159  1287.487475  1521.576107  1907.822349  2098.606675   
 upper_risk  1950.738598  2145.812458  2535.960178  3179.703915  3497.677792   
 
                                                                              \
                  F5_1_6       F5_1_7       F5_1_8       F5_1_9      F5_1_10   
 mean_risk   3306.891142  1045.595889  1150.152689  1359.275585  1704.325109   
 lower_risk  2480.168357   784.196917   862.614517  1019.456689  1278.243832   
 upper_risk  4133.613928  1306.994861  1437.690861  1699.094481  2130.406387   
 
             ...       rcp8p5                                        \
             ...       F5_1_3      F5_1_4       F5_1_5       F5_1_6   
 mean_risk   ...  2418.692525  3032.66832  3335.938477

# Government data processing

In [15]:
def extract_pg_infrastructure(country_code):
    """
    Extract the infrastructure data from the government power grid data (GOV)
        for a specific country.

    Args:
        country_code (str): Country code for the desired country.

    Returns:
        tuple: A tuple containing two pandas GeoDataFrames:
            - pg_lines: GOV lines data.
            - pg_points: GOV points data.
    """
    
    files = [x for x in os.listdir(pg_data_path)  if country_code in x ]
    pg_types = ['line','point']
    
    for pg_type in pg_types:
        #print(os.path.isfile(os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))))
        if os.path.isfile(os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))):
            if pg_type=='line':
                for file in files: 
                    file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))

                    pg_data_country = gpd.read_file(file_path)
                    pg_data_country = pd.DataFrame(pg_data_country.copy())
                    pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
                    pg_data_country['geometry'] = reproject(pg_data_country)

                pg_lines = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['line'])],buffer_size=100).reset_index(drop=True)

            elif pg_type=='point':
                for file in files:
                    file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))

                    pg_data_country = gpd.read_file(file_path)
                    pg_data_country = pd.DataFrame(pg_data_country.copy())
                    pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
                    pg_data_country['geometry'] = reproject(pg_data_country)

                pg_points = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['plant','substation','power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)

    return pg_lines,pg_points

In [16]:
def assess_damage_pg(country_code,pg_infra,hazard_type):
    """
    Assess the damage to government power grid data (GOV) based on a hazard type.

    Args:
        country_code (str): Country code for the desired country.
        pg_infra (tuple): Tuple containing two pandas GeoDataFrames:
            - pg_lines: GOV lines data.
            - pg_points: GOV points data.
        hazard_type (str): Type of hazard ('tc' for tropical cyclone, 'fl' for coastal flood).

    Returns:
        tuple: A tuple containing two pandas DataFrames:
            - damaged_lines: Damage assessment results for GOV lines.
            - damaged_points: Damage assessment results for GOV points.
    """
    
    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(country_code,vul_curve_path,hazard_type)
    
    # read infrastructure data:
    pg_lines,pg_points = pg_infra
    
    #calculate damaged lines/polygons/points in loop by climate_model
    damaged_lines = {}
    damaged_points = {}

    if hazard_type=='tc':
        # read wind data
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        df_ds = open_storm_data(country_code)
        
        # remove assets that will not have any damage
        pg_points = pg_points.loc[pg_points.asset != 'plant'].reset_index(drop=True)
    
    elif hazard_type == 'fl':
        # read flood data
        climate_models = ['historical','rcp8p5']
        df_ds = open_flood_data(country_code)
        
    for climate_model in climate_models:
        if hazard_type=='tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'] 
            
        # assess damage for lines
        overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_lines).T,
                                     columns=['asset','hazard_point'])

        if len(overlay_lines) == 0:
            damaged_lines[climate_model] = pd.DataFrame()

        else:
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            pg_lines,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_line = dict(zip(pg_lines.index,pg_lines.asset))
            
            if hazard_type=='tc':
                results = pd.DataFrame([item for sublist in collect_line_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
                
            elif hazard_type == 'fl':
                results = pd.DataFrame(np.array(list(flatten(collect_line_damages))).reshape(
                    int(len(list(flatten(collect_line_damages)))/6), 6),
                                       columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset'] = results['asset'].astype(int)
                results[['meandam','lowerdam','upperdam']] = results[['meandam','lowerdam','upperdam']].astype(float)
                
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

        # assess damage for points
        overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_points).T,
                                      columns=['asset','hazard_point'])

        if len(overlay_points) == 0:
            damaged_points[climate_model] = pd.DataFrame()

        else:
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            pg_points,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_point = dict(zip(pg_points.index,pg_points.asset))
            
            if hazard_type == 'tc':
                results = pd.DataFrame([item for sublist in collect_point_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
            elif hazard_type == 'fl':
                results = pd.DataFrame(np.array(list(flatten(collect_point_damages))).reshape(
                    int(len(list(flatten(collect_point_damages)))/6), 6),
                                       columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset'] = results['asset'].astype(int)
                results[['meandam','lowerdam','upperdam']] = results[['meandam','lowerdam','upperdam']].astype(float)

            results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])    

            damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

                
    return damaged_lines,damaged_points


In [24]:
def country_analysis_pg(country_code,hazard_type):
    """
    Perform risk analysis for a specific country and hazard type.

    Args:
        country_code (str): Country code for the desired country.
        hazard_type (str, optional): Type of hazard.

    Returns:
        tuple: A tuple containing the risk assessment results for power grid lines, plants, and substations.
    """
    
    # extract infrastructure data from gov data
    pg_power_infra = extract_pg_infrastructure(country_code)
    
    # assess damage to hazard_type
    pg_damage_infra = assess_damage_pg(country_code,pg_power_infra,hazard_type)
    
    line_risk = {}
    plant_risk = {}
    substation_risk = {}

    for i in range(len(pg_damage_infra)):
        if hazard_type=='tc':
            climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
            
        elif hazard_type=='fl':
            climate_models = ['historical','rcp8p5']
            
        for climate_model in climate_models:
            if len(pg_damage_infra[i]) == 0:
                df = pd.DataFrame()
                print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

            elif len(pg_damage_infra[i]) >= 0:
                df = pg_damage_infra[i][climate_model]

                with pd.ExcelWriter(os.path.join(output_path,'damage','{}_pg_{}{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                    df.to_excel(writer)
                
                if hazard_type=='tc':
                    curve_code_substation = ['W2_1_1','W2_1_2','W2_1_3','W2_1_4','W2_1_5','W2_1_6','W2_2_1','W2_2_2','W2_2_3','W2_2_4','W2_2_5','W2_2_6',
                                             'W2_3_1','W2_3_2','W2_3_3','W2_3_4','W2_3_5','W2_3_6','W2_4_1','W2_4_2','W2_4_3','W2_4_4','W2_4_5','W2_4_6',
                                             'W2_5_1','W2_5_2','W2_5_3','W2_5_4','W2_5_5','W2_5_6','W2_6_1','W2_6_2','W2_6_3','W2_6_4','W2_6_5','W2_6_6',
                                             'W2_7_1','W2_7_2','W2_7_3','W2_7_4','W2_7_5','W2_7_6']

                    curve_code_line = ['W5_1_1','W5_1_2','W5_1_3','W5_1_4','W5_1_5','W5_1_6','W5_1_7','W5_1_8','W5_1_9','W5_1_10','W5_1_11','W5_1_12',
                                       'W5_2_1','W5_2_2','W5_2_3','W5_2_4','W5_2_5','W5_2_6','W5_2_7','W5_2_8','W5_2_9','W5_2_10','W5_2_11','W5_2_12',
                                       'W5_3_1','W5_3_2','W5_3_3','W5_3_4','W5_3_5','W5_3_6','W5_3_7','W5_3_8','W5_3_9','W5_3_10','W5_3_11','W5_3_12',
                                       'W5_4_1','W5_4_2','W5_4_3','W5_4_4','W5_4_5','W5_4_6','W5_4_7','W5_4_8','W5_4_9','W5_4_10','W5_4_11','W5_4_12',
                                       'W5_5_1','W5_5_2','W5_5_3','W5_5_4','W5_5_5','W5_5_6','W5_5_7','W5_5_8','W5_5_9','W5_5_10','W5_5_11','W5_5_12',
                                       'W5_6_1','W5_6_2','W5_6_3','W5_6_4','W5_6_5','W5_6_6','W5_6_7','W5_6_8','W5_6_9','W5_6_10','W5_6_11','W5_6_12',
                                       'W5_7_1','W5_7_2','W5_7_3','W5_7_4','W5_7_5','W5_7_6','W5_7_7','W5_7_8','W5_7_9','W5_7_10','W5_7_11','W5_7_12']
                    
                    
                elif hazard_type=='fl':
                    curve_code_plant = ['F1_1_1','F1_1_2','F1_1_3']
                    curve_code_substation = ['F2_1_1','F2_1_2','F2_1_3']
                    curve_code_line = ['F5_1_1','F5_1_2','F5_1_3','F5_1_4','F5_1_5','F5_1_6','F5_1_7','F5_1_8',
                                       'F5_1_9','F5_1_10','F5_1_11','F5_1_12']

                #assess risk for power lines
                if i == 0:
                    if len(df) > 0:
                        if hazard_type == 'tc':
                            df['rp'] = df['rp'].replace(['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),
                                                '1_10{}'.format(climate_model),'1_25{}'.format(climate_model),'1_50{}'.format(climate_model),
                                                '1_100{}'.format(climate_model),'1_250{}'.format(climate_model),'1_500{}'.format(climate_model),
                                                '1_1000{}'.format(climate_model)],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])

                        elif hazard_type == 'fl':
                            df['rp'] = df['rp'].replace(['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'],
                                                        [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])
                        
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power lines ...")

                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()

                                line_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                
                    else:
                        line_risk = pd.DataFrame()
                
                #assess risk for power plants and substations                
                elif i == 1:
                    if len(df) > 0:
                        if hazard_type == 'tc':
                            df['rp'] = df['rp'].replace(['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),
                                                '1_10{}'.format(climate_model),'1_25{}'.format(climate_model),'1_50{}'.format(climate_model),
                                                '1_100{}'.format(climate_model),'1_250{}'.format(climate_model),'1_500{}'.format(climate_model),
                                                '1_1000{}'.format(climate_model)],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])

                        elif hazard_type == 'fl':
                            df['rp'] = df['rp'].replace(['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'],
                                                        [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])

                            for curve_code in curve_code_plant:
                                loss_list = df.loc[df['curve'] == curve_code]
                                if len(loss_list) == 0:
                                    print("No risk of plants ...")

                                else:
                                    loss_list_mean = loss_list.meandam.values.tolist()
                                    loss_list_lower = loss_list.lowerdam.values.tolist()
                                    loss_list_upper = loss_list.upperdam.values.tolist()
                                    RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                    RPS = RPS.rp.values.tolist()
                                    plant_risk[climate_model,curve_code] = {
                                        'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                        'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                        'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                    }

                        for curve_code in curve_code_substation:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of substations ...")

                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()

                                substation_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                    else:
                        plant_risk = pd.DataFrame()
                        substation_risk = pd.DataFrame()

    return pd.DataFrame(line_risk),pd.DataFrame(plant_risk),pd.DataFrame(substation_risk)

In [25]:
phl_pg_risk = country_analysis_pg('PRK','tc')

The ratio_usa for PRK is 0.0106


polyline damage calculation for PRK tc (): 100%|███████████████████████████████████████| 48/48 [01:12<00:00,  1.51s/it]
point damage calculation for PRK tc (): 100%|██████████████████████████████████████████| 28/28 [00:07<00:00,  3.76it/s]
polyline damage calculation for PRK tc (_CMCC-CM2-VHR4): 100%|█████████████████████████| 48/48 [01:10<00:00,  1.48s/it]
point damage calculation for PRK tc (_CMCC-CM2-VHR4): 100%|████████████████████████████| 28/28 [00:07<00:00,  3.81it/s]
polyline damage calculation for PRK tc (_CNRM-CM6-1-HR): 100%|█████████████████████████| 48/48 [01:11<00:00,  1.48s/it]
point damage calculation for PRK tc (_CNRM-CM6-1-HR): 100%|████████████████████████████| 28/28 [00:07<00:00,  3.83it/s]
polyline damage calculation for PRK tc (_EC-Earth3P-HR): 100%|█████████████████████████| 48/48 [01:11<00:00,  1.49s/it]
point damage calculation for PRK tc (_EC-Earth3P-HR): 100%|████████████████████████████| 28/28 [00:07<00:00,  3.80it/s]
polyline damage calculation for PRK tc (

# Save results

In [26]:
def risk_output(country_code,hazard_type,infra_type):
    """
    Generate risk output based on country, hazard type, and infrastructure type.

    Args:
        country_code (str): Country code for the desired country.
        hazard_type (str): Type of hazard ('tc' for tropical cyclone, 'fl' for coastal flood).
        infra_type (str): Type of infrastructure ('osm' for OpenStreetMap, 'gov' for government data).

    Returns:
        None
    """
    
    if hazard_type == 'tc':
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        
    elif hazard_type == 'fl':
        climate_models = ['historical','rcp8p5']
  
    if infra_type == 'osm':
        line_risk,plant_risk,substation_risk,tower_risk,pole_risk = country_analysis_osm(country_code,hazard_type)
    
    elif infra_type == 'gov':
        line_risk,plant_risk,substation_risk = country_analysis_pg(country_code,hazard_type)
            
    for climate_model in climate_models:
        if hazard_type == 'tc':
            if climate_model == '':
                writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_{}_risk'.format(country_code,infra_type,hazard_type,'present')+'.xlsx'),
                                        engine='openpyxl')
            else:
                writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}{}_risk'.format(country_code,infra_type,hazard_type,climate_model)+'.xlsx'),
                                        engine='openpyxl')
        elif hazard_type == 'fl':
            writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_{}_risk'.format(country_code,infra_type,hazard_type,climate_model)+'.xlsx'),
                                        engine='openpyxl')
        
        if infra_type == 'osm':
            # write each dataframe to a different sheet
            if len(line_risk) != 0:
                line_risk[climate_model].to_excel(writer, sheet_name='line_risk')
            if len(plant_risk) != 0:
                plant_risk[climate_model].to_excel(writer, sheet_name='plant_risk')
            if len(substation_risk) != 0:
                substation_risk[climate_model].to_excel(writer, sheet_name='substation_risk')
            if len(tower_risk) != 0:
                tower_risk[climate_model].to_excel(writer, sheet_name='tower_risk')
            if len(pole_risk) != 0:
                pole_risk[climate_model].to_excel(writer, sheet_name='pole_risk')

        elif infra_type == 'gov':
            # write each dataframe to a different sheet
            if len(line_risk) != 0:
                line_risk[climate_model].to_excel(writer, sheet_name='line_risk')
            if len(plant_risk) != 0:
                plant_risk[climate_model].to_excel(writer, sheet_name='plant_risk')
            if len(substation_risk) != 0:
                substation_risk[climate_model].to_excel(writer, sheet_name='substation_risk')
            
        # save the Excel file
        if writer.sheets:
            writer.save()

        else:
            df = pd.DataFrame()
            df.loc[1:3, 0] = ['mean_risk', 'lower_risk', 'upper_risk']

            if hazard_type == 'tc':
                if climate_model == '':
                    writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_{}_risk'.format(country_code,infra_type,hazard_type,'present')+'.xlsx'),
                                            engine='openpyxl')
                else:
                    writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}{}_risk'.format(country_code,infra_type,hazard_type,climate_model)+'.xlsx'),
                                            engine='openpyxl')
            elif hazard_type == 'fl':
                writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_{}_risk'.format(country_code,infra_type,hazard_type,climate_model)+'.xlsx'),
                                            engine='openpyxl')

            df.to_excel(writer,sheet_name='line_risk', index=False)
            writer.save()