In [92]:
import os,sys
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
import pandas as pd
from osgeo import ogr,gdal
import xarray as xr
import rasterio
import numpy as np
import pyproj
from pygeos import from_wkb,from_wkt
import pygeos
from tqdm import tqdm
from shapely.wkb import loads
from pathlib import Path
import glob
from shapely.geometry import mapping
pd.options.mode.chained_assignment = None
from rasterio.mask import mask
import rioxarray
import matplotlib.pyplot as plt
from scipy import integrate
from collections.abc import Iterable
import openpyxl
from openpyxl import Workbook

import warnings
warnings.filterwarnings("ignore")

from scipy import integrate
from sklearn.preprocessing import MinMaxScaler

In [2]:
gdal.SetConfigOption("OSM_CONFIG_FILE", os.path.join('..',"osmconf.ini"))

# change paths to make it work on your own machine
data_path = os.path.join('C:\\','Data','pg_risk_analysis')
tc_path = os.path.join(data_path,'tc_netcdf')
fl_path = os.path.join(data_path,'GLOFRIS')
osm_data_path = os.path.join('C:\\','Data','country_osm')
pg_data_path = os.path.join(data_path,'pg_data')
vul_curve_path = os.path.join(data_path,'vulnerability_curves','input_vulnerability_data.xlsx')
output_path = os.path.join('C:\\','projects','pg_risk_analysis_output','output')
ne_path = os.path.join(data_path,'..',"natural_earth","ne_10m_admin_0_countries.shp")

In [3]:
def flatten(xs):
    for x in xs:
        if isinstance(x, Iterable) and not isinstance(x, (str, bytes)):
            yield from flatten(x)
        else:
            yield x

def query_b(geoType,keyCol,**valConstraint):
    """
    This function builds an SQL query from the values passed to the retrieve() function.
    Arguments:
         *geoType* : Type of geometry (osm layer) to search for.
         *keyCol* : A list of keys/columns that should be selected from the layer.
         ***valConstraint* : A dictionary of constraints for the values. e.g. WHERE 'value'>20 or 'value'='constraint'
    Returns:
        *string: : a SQL query string.
    """
    query = "SELECT " + "osm_id"
    for a in keyCol: query+= ","+ a  
    query += " FROM " + geoType + " WHERE "
    # If there are values in the dictionary, add constraint clauses
    if valConstraint: 
        for a in [*valConstraint]:
            # For each value of the key, add the constraint
            for b in valConstraint[a]: query += a + b
        query+= " AND "
    # Always ensures the first key/col provided is not Null.
    query+= ""+str(keyCol[0]) +" IS NOT NULL" 
    return query 


def retrieve(osm_path,geoType,keyCol,**valConstraint):
    """
    Function to extract specified geometry and keys/values from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.     
        *geoType* : Type of Geometry to retrieve. e.g. lines, multipolygons, etc.
        *keyCol* : These keys will be returned as columns in the dataframe.
        ***valConstraint: A dictionary specifiying the value constraints.  
        A key can have multiple values (as a list) for more than one constraint for key/value.  
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all columns, geometries, and constraints specified.    
    """
    driver=ogr.GetDriverByName('OSM')
    data = driver.Open(osm_path)
    query = query_b(geoType,keyCol,**valConstraint)
    sql_lyr = data.ExecuteSQL(query)
    features =[]
    # cl = columns 
    cl = ['osm_id'] 
    for a in keyCol: cl.append(a)
    if data is not None:
        print('query is finished, lets start the loop')
        for feature in tqdm(sql_lyr,desc='extract'):
            #try:
            if feature.GetField(keyCol[0]) is not None:
                geom1 = (feature.geometry().ExportToWkt())
                #print(geom1)
                geom = from_wkt(feature.geometry().ExportToWkt()) 
                if geom is None:
                    continue
                # field will become a row in the dataframe.
                field = []
                for i in cl: field.append(feature.GetField(i))
                field.append(geom)   
                features.append(field)
            #except:
            #    print("WARNING: skipped OSM feature")   
    else:
        print("ERROR: Nonetype error when requesting SQL. Check required.")    
    cl.append('geometry')                   
    if len(features) > 0:
        return pd.DataFrame(features,columns=cl)
    else:
        print("WARNING: No features or No Memory. returning empty GeoDataFrame") 
        return pd.DataFrame(columns=['osm_id','geometry'])

def power_polyline(osm_path):
    """
    Function to extract all energy linestrings from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'lines',['power','voltage'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #print(df) #check infra keys
    
    return df.reset_index(drop=True)

# def power_polygon(osm_path): # check with joel, something was wrong here with extracting substations
#     """
#     Function to extract energy polygons from OpenStreetMap  
#     Arguments:
#         *osm_path* : file path to the .osm.pbf file of the region 
#         for which we want to do the analysis.        
#     Returns:
#         *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
#     """
#     df = retrieve(osm_path,'multipolygons',['other_tags']) 
    
#     df = df.loc[(df.other_tags.str.contains('power'))]   #keep rows containing power data         
#     df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
    
#     df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
#     df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
#     df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
            
#     return df.reset_index(drop=True) 

def electricity(osm_path):
    """
    Function to extract building polygons from OpenStreetMap    
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all unique building polygons.    
    """
    df = retrieve(osm_path,'multipolygons',['power'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #df = df[df.asset!='generator']
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
        
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
    
    #print(df['asset'].unique())
    
    return df.reset_index(drop=True)

# def retrieve_poly_subs(osm_path, w_list, b_list):
#     """
#     Function to extract electricity substation polygons from OpenStreetMap
#     Arguments:
#         *osm_path* : file path to the .osm.pbf file of the region
#         for which we want to do the analysis.
#         *w_list* :  white list of keywords to search in the other_tags columns
#         *b_list* :  black list of keywords of rows that should not be selected
#     Returns:
#         *GeoDataFrame* : a geopandas GeoDataFrame with specified unique substation.
#     """
#     df = retrieve(osm_path,'multipolygons',['other_tags'])
#     df = df[df.other_tags.str.contains('substation', case=False, na=False)]
#     #df = df.loc[(df.other_tags.str.contains('substation'))]
#     df = df[~df.other_tags.str.contains('|'.join(b_list))]
#     #df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})
#     df['asset']  = 'substation' #specify row
#     #df = df.loc[(df.asset == 'substation')] #specify row
#     return df.reset_index(drop=True)

def power_point(osm_path):
    """
    Function to extract energy points from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """   
    df = retrieve(osm_path,'points',['other_tags']) 
    df = df.loc[(df.other_tags.str.contains('power'))]  #keep rows containing power data       
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
        
    df['asset'].loc[df['asset'].str.contains('"power"=>"tower"', case=False)]  = 'power_tower' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"pole"', case=False)] = 'power_pole' #specify row
    #df['asset'].loc[df['asset'].str.contains('"utility"=>"power"', case=False)] = 'power_tower' #specify row
    
    df = df.loc[(df.asset == 'power_tower') | (df.asset == 'power_pole')]
            
    return df.reset_index(drop=True)

In [47]:
def reproject(df_ds, current_crs="epsg:4326", approximate_crs="epsg:3857"):

    # Extract the input geometries as a numpy array of coordinates
    geometries = df_ds['geometry']
    coords = pygeos.get_coordinates(geometries)

    # Transform the coordinates using pyproj
    transformer = pyproj.Transformer.from_crs(current_crs, approximate_crs, always_xy=True)
    new_coords = transformer.transform(coords[:, 0], coords[:, 1])

    # Create a new GeoSeries with the reprojected coordinates
    return pygeos.set_coordinates(geometries.copy(), np.array(new_coords).T)

def buffer_assets(assets, buffer_size=100):
    """
    Create a buffer of a specified size around the geometries in a GeoDataFrame.
    
    Args:
        assets (GeoDataFrame): A GeoDataFrame containing geometries to be buffered.
        buffer_size (int, optional): The distance in the units of the GeoDataFrame's CRS to buffer the geometries.
            Defaults to 100.
    
    Returns:
        GeoDataFrame: A new GeoDataFrame with an additional column named 'buffered' containing the buffered
            geometries.
    """
    # Create a buffer of the specified size around the geometries
    assets['buffered'] = pygeos.buffer(assets.geometry.values, buffer_size)
    
    return assets

def load_curves_maxdam(country_code,vul_curve_path,hazard_type):
    """[summary]

    Args:
        data_path ([type]): [description]

    Returns:
        [type]: [description]
    """
    
    # dictionary of GDP per capita ratio for each country
    gdp_ratio = {
        "BRN": {"ratio_usa": 0.5201},
        "KHM": {"ratio_usa": 0.0240},
        "CHN": {"ratio_usa": 0.1772},
        "IDN": {"ratio_usa": 0.0647},
        "JPN": {"ratio_usa": 0.5912},
        "LAO": {"ratio_usa": 0.0434},
        "MYS": {"ratio_usa": 0.1775},
        "MNG": {"ratio_usa": 0.0703},
        "MMR": {"ratio_usa": 0.0276},
        "PRK": {"ratio_usa": 0.0106},
        "PHL": {"ratio_usa": 0.0547},
        "SGP": {"ratio_usa": 1.0091},
        "KOR": {"ratio_usa": 0.5367},
        "TWN": {"ratio_usa": 0.4888},
        "THA": {"ratio_usa": 0.1034},
        "VNM": {"ratio_usa": 0.0573},
        "HKG": {"ratio_usa": 0.7091},
        "MAC": {"ratio_usa": 0.5913}}
    
    if hazard_type == 'tc':
        sheet_name = 'wind_curves'
        
        # dictionary of design wind speeds for each country
        design_wind_speed = {
            "BRN": {"dws": 32},
            "KHM": {"dws": 32},
            "CHN": {"dws": 52},
            "IDN": {"dws": 32},
            "JPN": {"dws": 52},
            "LAO": {"dws": 32},
            "MYS": {"dws": 32},
            "MNG": {"dws": 0},
            "MMR": {"dws": 39},
            "PRK": {"dws": 39},
            "PHL": {"dws": 52},
            "SGP": {"dws": 32},
            "KOR": {"dws": 52},
            "TWN": {"dws": 60},
            "THA": {"dws": 39},
            "VNM": {"dws": 44}}
        
        curves = pd.read_excel(vul_curve_path,sheet_name=sheet_name,skiprows=11)
        
        dws = design_wind_speed.get(country_code, {}).get("dws", None)
        scaling_factor = dws / 60 #shift design wind speed of all curves to 60 m/s

        curves = curves.apply(lambda x: x * scaling_factor if pd.api.types.is_numeric_dtype(x) else x)
        
        curves = curves.set_index('Wind speed (m/s)')
        
    elif hazard_type == 'fl':
        sheet_name = 'flooding_curves'    
        
        # load curves and maximum damages as separate inputs
        curves = pd.read_excel(vul_curve_path,sheet_name=sheet_name,skiprows=11,index_col=[0])

    maxdam = pd.read_excel(vul_curve_path,sheet_name=sheet_name,index_col=[0],header=[0,1]).iloc[:8]
    #maxdam = maxdam.rename({'substation_point':'substation'},level=0,axis=1)        
        
    curves.columns = maxdam.columns
    
    #interpolate the curves to fill missing values
    curves = curves.interpolate()
    #print(curves.tail(10))
    
    #transpose maxdam so its easier work with the dataframe
    maxdam = maxdam.T
    
    ratio_usa = gdp_ratio.get(country_code, {}).get("ratio_usa", None)

    if ratio_usa is not None:
        print(f"The ratio_usa for {country_code} is {ratio_usa}")
    else:
        print(f"No ratio_usa found for {country_code}")
        
    maxdam['MaxDam'] = maxdam['MaxDam'] * ratio_usa
    maxdam['LowerDam'] = maxdam['LowerDam'] * ratio_usa
    maxdam['UpperDam'] = maxdam['UpperDam'] * ratio_usa

    return curves,maxdam


def overlay_hazard_assets(df_ds, assets):
    """
    Overlay a set of assets with a hazard dataset and return the subset of assets that intersect with
    one or more hazard polygons or lines.
    
    Args:
        df_ds (GeoDataFrame): A GeoDataFrame containing the hazard dataset.
        assets (GeoDataFrame): A GeoDataFrame containing the assets to be overlaid with the hazard dataset.
    
    Returns:
        ndarray: A numpy array of integers representing the indices of the hazard geometries that intersect with
            the assets. If the assets have a 'buffered' column, the buffered geometries are used for the overlay.
    """
    hazard_tree = pygeos.STRtree(df_ds.geometry.values)
    #if (pygeos.get_type_id(assets.iloc[0].geometry) == 3) | (pygeos.get_type_id(assets.iloc[0].geometry) == 6):
    if len(assets) > 0:
        if (pygeos.get_type_id(assets.iloc[0].geometry) == 3) or (pygeos.get_type_id(assets.iloc[0].geometry) == 6):
            return hazard_tree.query_bulk(assets.geometry,predicate='intersects')    
        else:
            return hazard_tree.query_bulk(assets.buffered,predicate='intersects')
    
    else:
        return hazard_tree.query_bulk(assets.buffered,predicate='intersects')
    
    
def get_damage_per_asset_per_rp(asset,df_ds,assets,curves,maxdam,return_period,country):
    """
    Calculates the damage per asset per return period based on asset type, hazard curves and maximum damage

    Args:
        asset (tuple): Tuple with two dictionaries, containing the asset index and the hazard point index of the asset
        df_ds (pandas.DataFrame): A pandas DataFrame containing hazard points with a 'geometry' column
        assets (geopandas.GeoDataFrame): A GeoDataFrame containing asset geometries and asset type information
        curves (dict): A dictionary with the asset types as keys and their corresponding hazard curves as values
        maxdam (pandas.DataFrame): A pandas DataFrame containing the maximum damage for each asset type
        return_period (str): The return period for which the damage should be calculated
        country (str): The country for which the damage should be calculated

    Returns:
        list or tuple: Depending on the input, the function either returns a list of tuples with the asset index, the curve name and the calculated damage, or a tuple with None, None, None if no hazard points are found
    """
    
    # find the exact hazard overlays:
    get_hazard_points = df_ds.iloc[asset[1]['hazard_point'].values].reset_index()
    get_hazard_points = get_hazard_points.loc[pygeos.intersects(get_hazard_points.geometry.values,assets.iloc[asset[0]].geometry)]

    asset_type = assets.iloc[asset[0]].asset
    asset_geom = assets.iloc[asset[0]].geometry

    if asset_type in ['plant','substation','generator']:
        #if plant,substation are points, do not calculate the area
        if pygeos.area(asset_geom) == 0:
            maxdam_asset = maxdam.loc[asset_type].MaxDam
            lowerdam_asset = maxdam.loc[asset_type].LowerDam
            upperdam_asset = maxdam.loc[asset_type].UpperDam
        else:
            maxdam_asset = maxdam.loc[asset_type].MaxDam/pygeos.area(asset_geom)
            lowerdam_asset = maxdam.loc[asset_type].LowerDam/pygeos.area(asset_geom)
            upperdam_asset = maxdam.loc[asset_type].UpperDam/pygeos.area(asset_geom)
    else:
        maxdam_asset = maxdam.loc[asset_type].MaxDam
        lowerdam_asset = maxdam.loc[asset_type].LowerDam
        upperdam_asset = maxdam.loc[asset_type].UpperDam

    hazard_intensity = curves[asset_type].index.values
    
    if isinstance(curves[asset_type],pd.core.series.Series):
        fragility_values = curves[asset_type].values.flatten()
        only_one = True
        curve_name = curves[asset_type].name
    elif len(curves[asset_type].columns) == 1:
        fragility_values = curves[asset_type].values.flatten()      
        only_one = True   
        curve_name = curves[asset_type].columns[0]
    else:
        fragility_values = curves[asset_type].values#.T[0]
        maxdam_asset = maxdam_asset.values#[0]
        only_one = False

    if len(get_hazard_points) == 0:
        if only_one:
            return [return_period,asset[0],curve_name,0,0,0]
        else:
            return [return_period,asset[0],curves[asset_type].columns[0],0,0,0]
            
    else:
        if only_one:    
            # run the calculation as normal when the asset just has a single curve
            if pygeos.get_type_id(asset_geom) == 1:            
                get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
                return [return_period,asset[0],curve_name,np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*maxdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*lowerdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*upperdam_asset)]

            elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
                return [return_period,asset[0],curve_name,get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*maxdam_asset*x.overlay_m2,axis=1).sum(),
                                                          get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*lowerdam_asset*x.overlay_m2,axis=1).sum(),
                                                          get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*upperdam_asset*x.overlay_m2,axis=1).sum()]  

            else:
                return [return_period,asset[0],curve_name,np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*maxdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*lowerdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*upperdam_asset)]
        else:
            # run the calculation when the asset has multiple curves
            if pygeos.get_type_id(asset_geom) == 1:            
                get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            
            collect_all = []
            for iter_,curve_ids in enumerate(curves[asset_type].columns):
                if pygeos.get_type_id(asset_geom) == 1:
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*maxdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*lowerdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*upperdam_asset[iter_])])
                                   
                elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*maxdam_asset[iter_]*x.overlay_m2,axis=1).sum(),
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*lowerdam_asset[iter_]*x.overlay_m2,axis=1).sum(),
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*upperdam_asset[iter_]*x.overlay_m2,axis=1).sum()])

                else:
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*maxdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*lowerdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*upperdam_asset[iter_])])
            return collect_all

In [53]:
load_curves_maxdam('VNM',vul_curve_path,'tc')[0]

Infrastructure type substation                                                
Code                    W2_1_1   W2_1_2   W2_1_3   W2_1_4   W2_1_5   W2_1_6   
Wind speed (m/s)                                                              
190.666667             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713  \
194.333333             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713   
198.000000             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713   
201.666667             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713   
205.333333             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713   
209.000000             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713   
212.666667             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713   
216.333333             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713   
220.000000             0.49713  0.49713  0.49713  0.49713  0.49713  0.49713   
223.666667             0.49713  0.49713  0.49713  0.

Infrastructure type,substation,substation,substation,substation,substation,substation,substation,substation,substation,substation,...,line,line,line,line,line,line,line,line,line,line
Code,W2_1_1,W2_1_2,W2_1_3,W2_1_4,W2_1_5,W2_1_6,W2_2_1,W2_2_2,W2_2_3,W2_2_4,...,W5_7_3,W5_7_4,W5_7_5,W5_7_6,W5_7_7,W5_7_8,W5_7_9,W5_7_10,W5_7_11,W5_7_12
Wind speed (m/s),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0.000000,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
0.327556,2.830435e-113,2.830435e-113,2.830435e-113,2.830435e-113,2.830435e-113,2.830435e-113,3.687061e-167,3.687061e-167,3.687061e-167,3.687061e-167,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
0.565787,5.660870e-113,5.660870e-113,5.660870e-113,5.660870e-113,5.660870e-113,5.660870e-113,7.374122e-167,7.374122e-167,7.374122e-167,7.374122e-167,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
0.655111,8.491306e-113,8.491306e-113,8.491306e-113,8.491306e-113,8.491306e-113,8.491306e-113,1.106118e-166,1.106118e-166,1.106118e-166,1.106118e-166,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
0.721451,1.132174e-112,1.132174e-112,1.132174e-112,1.132174e-112,1.132174e-112,1.132174e-112,1.474824e-166,1.474824e-166,1.474824e-166,1.474824e-166,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209.000000,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.972049e-01,4.972049e-01,4.972049e-01,4.972049e-01,...,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147
212.666667,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.972049e-01,4.972049e-01,4.972049e-01,4.972049e-01,...,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147
216.333333,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.972049e-01,4.972049e-01,4.972049e-01,4.972049e-01,...,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147
220.000000,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.971302e-01,4.972049e-01,4.972049e-01,4.972049e-01,4.972049e-01,...,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147,0.627147


In [7]:
brn_load[1]

Unnamed: 0_level_0,Unnamed: 1_level_0,Specific occupancy,Reference,Type vulnerability data,Cost type,Unit,MaxDam,LowerDam,UpperDam
Infrastructure type,Code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
substation,W2_1_1,Open,"Watson and Etemadi, 2020",curve,,euro/facility,4748773.05,3561579.7875,5935966.3125
substation,W2_1_2,Open,"Watson and Etemadi, 2020",curve,,euro/facility,2933686.462,2200264.8465,3667108.0775
substation,W2_1_3,Open,"Watson and Etemadi, 2020",curve,,euro/facility,1582924.35,1187193.2625,1978655.4375
substation,W2_1_4,Open,"Watson and Etemadi, 2020",curve,,euro/facility,997242.3405,747931.755375,1246552.925625
substation,W2_1_5,Open,"Watson and Etemadi, 2020",curve,,euro/facility,838949.9055,629212.429125,1048687.381875
...,...,...,...,...,...,...,...,...,...
line,W5_7_8,Transmission line,"Xue et al., 2020",curve,,euro/m,154.277353,115.708015,192.846691
line,W5_7_9,Transmission line,"Xue et al., 2020",curve,,euro/m,182.328348,136.746261,227.910435
line,W5_7_10,Transmission line,"Xue et al., 2020",curve,,euro/m,228.612052,171.459039,285.765065
line,W5_7_11,Transmission line,"Xue et al., 2020",curve,,euro/m,251.472759,188.604569,314.340949


In [5]:
def load_storm_data(climate_model,basin,bbox):
    """
    Load storm data from a NetCDF file and process it to return a pandas DataFrame.

    Parameters:
    - climate_model (str): name of the climate model
    - basin (str): name of the basin
    - bbox (tuple): bounding box coordinates in the format (minx, miny, maxx, maxy)
    - ne_crs (str): CRS string of the North-East projection

    Returns:
    - df_ds (pd.DataFrame): pandas DataFrame with interpolated wind speeds for different return periods and geometry column
    """
    # set paths
    # data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()

    filename = os.path.join(tc_path, f'STORM_FIXED_RETURN_PERIODS{climate_model}_{basin}.nc')
    
    # load data from NetCDF file
    with xr.open_dataset(filename) as ds:
        
        # convert data to WGS84 CRS
        ds.rio.write_crs(4326, inplace=True)
        ds = ds.rio.clip_box(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])
        
        #convert 10-min sustained wind speed to 3-s gust wind speed
        ds['mean_3s'] = ds['mean']/0.88*1.11

        # get the mean values
        df_ds = ds['mean_3s'].to_dataframe().unstack(level=2).reset_index()

        # create geometry values and drop lat lon columns
        df_ds['geometry'] = [pygeos.points(x) for x in list(zip(df_ds['lon'], df_ds['lat']))]
        df_ds = df_ds.drop(['lat', 'lon'], axis=1, level=0)
        
        # interpolate wind speeds of 1,2,5,25,and 250-yr return period
        ## rename columns to return periods (must be integer for interpolating)
        df_ds_geometry = pd.DataFrame()
        df_ds_geometry['geometry'] = df_ds['geometry']
        df_ds = df_ds.drop(['geometry'], axis=1, level=0)
        df_ds = df_ds['mean_3s']
        df_ds.columns = [int(x) for x in ds['mean_3s']['rp']]
        df_ds[1] = np.nan
        df_ds[2] = np.nan
        df_ds[5] = np.nan
        df_ds[25] = np.nan
        df_ds[250] = np.nan
        df_ds = df_ds.reindex(sorted(df_ds.columns), axis=1)
        df_ds = df_ds.interpolate(method='pchip', axis=1, limit_direction='both')
        df_ds['geometry'] = df_ds_geometry['geometry']
        df_ds = df_ds[[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 'geometry']]
        
        # rename columns to return periods
        df_ds.columns = ['1_{}{}'.format(int(x), climate_model) for x in [1, 2, 5, 10, 25, 50, 100, 250, 500, 1000]] +['geometry']
        df_ds['geometry'] = pygeos.buffer(df_ds.geometry, radius=0.1/2, cap_style='square').values
        
        # reproject the geometry column to the specified CRS
        df_ds['geometry'] = reproject(df_ds)
            
        # drop all non values to reduce size
        #df_ds = df_ds.loc[~df_ds['1_10000{}'.format(climate_model)].isna()].reset_index(drop=True)
        df_ds = df_ds.fillna(0)

    return df_ds

def open_storm_data(country_code):
    """
    This function loads STORM data for a given country code, clips it based on the country geometry,
    and combines data from different basins and climate models.

    Args:
    - country_code (str): a 3-letter ISO code of the country of interest

    Returns:
    - df_ds (dict): a dictionary containing STORM data for different climate models, organized by basin
    """
    # set paths
    # data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()
    
    # list of available climate models
    climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

    # dictionary of basins for each country
    country_basin = {
        "BRN": ["WP"],
        "KHM": ["WP"],
        "CHN": ["WP", "NI"],
        "IDN": ["SI", "SP", "NI", "WP"],
        "JPN": ["WP"],
        "LAO": ["WP"],
        "MYS": ["WP", "NI"],
        "MNG": ["WP", "NI"],
        "MMR": ["NI", "WP"],
        "PRK": ["WP"],
        "PHL": ["WP"],
        "SGP": ["WP"],
        "KOR": ["WP"],
        "TWN": ["WP"],
        "THA": ["WP", "NI"],
        "VNM": ["WP"]
    }

    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file(os.path.join(data_path,'..',"natural_earth","ne_10m_admin_0_countries.shp"))
    bbox = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.buffer(1).values[0].bounds
    # ne_countries = gpd.read_file('C:/Users/mye500/OneDrive - Vrije Universiteit Amsterdam/01_Research-Projects/01_risk_assessment/base_map/base_map_adm_0.gpkg')
    # bbox = ne_countries.loc[ne_countries['GID_0']==country_code].geometry.buffer(1).values[0].bounds

    df_ds = {}
    for climate_model in climate_models:
        concat_prep = []

        #combine STORM data from different basins
        if "WP" in country_basin[country_code]:
            WP = load_storm_data(climate_model,'WP',bbox)
            concat_prep.append(WP)
        if "SP" in country_basin[country_code]:
            SP = load_storm_data(climate_model,'SP',bbox)
            concat_prep.append(SP)
        if "NI" in country_basin[country_code]:            
            NI = load_storm_data(climate_model,'NI',bbox)
            concat_prep.append(NI)            
        if "SI" in country_basin[country_code]:       
            SI = load_storm_data(climate_model,'SI',bbox)
            concat_prep.append(SI)            
                   
        df_ds_cl = pd.concat(concat_prep, keys=country_basin[country_code])
        df_ds_cl = df_ds_cl.reset_index(drop=True)
        df_ds[climate_model] = df_ds_cl

    return df_ds

In [160]:
twn_wind = open_storm_data('TWN')

In [69]:
brn_wind = open_storm_data('BRN')

In [106]:
brn_wind[''][['1_1000','geometry']] #climate_model, return_period

Unnamed: 0,1_1000,geometry
0,25.881390,"POLYGON ((12579102.46 568480.588, 12579102.46 ..."
1,25.803687,"POLYGON ((12590234.409 568480.588, 12590234.40..."
2,25.622256,"POLYGON ((12601366.358 568480.588, 12601366.35..."
3,25.560145,"POLYGON ((12612498.307 568480.588, 12612498.30..."
4,25.383634,"POLYGON ((12623630.256 568480.588, 12623630.25..."
...,...,...
380,27.470805,"POLYGON ((12913060.932 680335.356, 12913060.93..."
381,27.363381,"POLYGON ((12924192.881 680335.356, 12924192.88..."
382,27.361039,"POLYGON ((12935324.83 680335.356, 12935324.83 ..."
383,27.350224,"POLYGON ((12946456.779 680335.356, 12946456.77..."


In [9]:
#ne
brn_wind['_CMCC-CM2-VHR4']['1_1_CMCC-CM2-VHR4'].min()

22.714411149336897

In [13]:
#gdam
twn_wind('TWN')['_CMCC-CM2-VHR4']#['1_1_CMCC-CM2-VHR4'].min()

32.77332750074388

In [74]:
def clip_flood_data(country_code):
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()

    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file(ne_path)
    geometry = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.values[0]
    geoms = [mapping(geometry)]
    
    #climate_model: historical, rcp4p5, rcp8p5; time_period: hist, 2030, 2050, 2080
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    climate_models = ['historical','rcp8p5']
    
    for rp in rps:
        #global input_file
        for climate_model in climate_models:
            if climate_model=='historical':
                input_file = os.path.join(fl_path,'global',
                                          'inuncoast_{}_nosub_hist_rp{}_0.tif'.format(climate_model,rp)) 
 
            elif climate_model=='rcp8p5':
                input_file = os.path.join(fl_path,'global',
                                          'inuncoast_{}_nosub_2030_rp{}_0.tif'.format(climate_model,rp))

            # load raster file and save clipped version
            with rasterio.open(input_file) as src:
                out_image, out_transform = mask(src, geoms, crop=True)
                out_meta = src.meta

                out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

                if 'scistor' in fl_path:
                    file_path = os.path.join(fl_path,'country','_'.join([country_code]+input_file.split('_')[6:]))
                else:
                    file_path = os.path.join(fl_path,'country','_'.join([country_code]+input_file.split('_')[3:]))

                with rasterio.open(file_path, "w", **out_meta) as dest:
                    dest.write(out_image)

def load_flood_data(country_code,climate_model):
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()
     
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    collect_df_ds = []
    
    if climate_model=='historical':
        print('Loading historical coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_hist_rp{}_0.tif'.format(country_code,climate_model,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                
                # move from meters to centimeters
                df_ds['rp'+rp] = (df_ds['rp'+rp]*100)         
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=0.0089932/2,cap_style='square').values  # the original value here is 0.00833???
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])
        df_all = df_all.loc[df_all['rp1000']>0].reset_index(drop=True)

    elif climate_model=='rcp8p5':
        print('Loading future coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_2030_rp{}_0.tif'.format(country_code,climate_model,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                df_ds['rp'+rp] = (df_ds['rp'+rp]*100)
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=0.00833/2,cap_style='square').values
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])

        df_all = df_all.loc[df_all['rp1000']>0].reset_index(drop=True)
    return df_all

def open_flood_data(country_code):
    climate_models = ['historical','rcp8p5']
    df_ds = {}
    for climate_model in climate_models:
        df_ds_sc = load_flood_data(country_code,climate_model)
        df_ds[climate_model] = df_ds_sc
    
    return df_ds

In [27]:
clip_flood_data('KOR')

In [109]:
%%time
khm_flood = open_flood_data('KHM')

Loading historical coastal flood data ...
Loading future coastal flood data ...
CPU times: total: 2min 9s
Wall time: 2min 9s


In [115]:
df1 = khm_flood['historical'][['rp0001','geometry']] #climate_model, return_period
df1 = df1[~df1.eq(0).any(axis=1)]
df1

Unnamed: 0,rp0001,geometry
1,20.138884,"POLYGON ((11460378.305 1301203.722, 11460378.3..."
6,34.355808,"POLYGON ((11461305.968 1298362.747, 11461305.9..."
7,99.879242,"POLYGON ((11462233.63 1312570.176, 11462233.63..."
8,162.722992,"POLYGON ((11462233.63 1311622.815, 11462233.63..."
9,171.230804,"POLYGON ((11462233.63 1310675.482, 11462233.63..."
...,...,...
321,22.372198,"POLYGON ((11601382.994 1183982.151, 11601382.9..."
327,4.255009,"POLYGON ((11602310.656 1183982.151, 11602310.6..."
331,7.622194,"POLYGON ((11603238.318 1183982.151, 11603238.3..."
334,20.360147,"POLYGON ((11604165.981 1183982.151, 11604165.9..."


In [117]:
df2 = khm_flood['historical'][['rp0100','geometry']] #climate_model, return_period
df2 = df2[~df2.eq(0).any(axis=1)]
df2

Unnamed: 0,rp0100,geometry
1,63.124001,"POLYGON ((11460378.305 1301203.722, 11460378.3..."
2,26.177502,"POLYGON ((11461305.968 1312570.176, 11461305.9..."
4,36.686016,"POLYGON ((11461305.968 1300256.702, 11461305.9..."
5,27.513527,"POLYGON ((11461305.968 1299309.71, 11461305.96..."
6,80.326653,"POLYGON ((11461305.968 1298362.747, 11461305.9..."
...,...,...
342,81.774506,"POLYGON ((11620863.904 1172659.691, 11620863.9..."
344,46.028496,"POLYGON ((11621791.567 1172659.691, 11621791.5..."
346,36.697803,"POLYGON ((11622719.229 1171716.318, 11622719.2..."
347,20.580435,"POLYGON ((11622719.229 1170772.971, 11622719.2..."


In [137]:
twn_flood = open_flood_data('TWN')

Loading historical coastal flood data ...
Loading future coastal flood data ...


In [None]:
twn_flood

# OSM data processing

In [7]:
def extract_osm_infrastructure(country_code,osm_data_path):
    """_summary_

    Args:
        country_code (_type_): _description_
        osm_data_path (_type_): _description_

    Returns:
        _type_: _description_
    """
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()
    
    # lines
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_lines = power_polyline(osm_path)
    osm_lines['geometry'] = reproject(osm_lines)
    osm_lines = buffer_assets(osm_lines.loc[osm_lines.asset.isin(
        ['cable','minor_cable','line','minor_line'])],buffer_size=100).reset_index(drop=True)
    
    # polygons
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_polygons = electricity(osm_path)
    osm_polygons['geometry'] = reproject(osm_polygons)
    
    # points
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_points = power_point(osm_path)
    osm_points['geometry'] = reproject(osm_points)
    osm_points = buffer_assets(osm_points.loc[osm_points.asset.isin(
        ['power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)
    
    return osm_lines,osm_polygons,osm_points


In [49]:
osm_power_infra = extract_osm_infrastructure('BRN',osm_data_path)

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████████| 87/87 [00:00<00:00, 491.55it/s]


query is finished, lets start the loop


extract: 100%|█████████████████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 74.34it/s]


['plant' 'substation']
query is finished, lets start the loop


extract: 100%|███████████████████████████████████████████████████████████████████| 8774/8774 [00:00<00:00, 9235.45it/s]


In [50]:
#NEW VERSION!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
def assess_damage_osm(country_code,osm_power_infra,hazard_type): #NEW VERSION
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()

    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(country_code,vul_curve_path,hazard_type)
    
    # read infrastructure data:
    osm_lines,osm_poly,osm_points = osm_power_infra
    
    #calculate damaged lines/polygons/points in loop by climate_model
    damaged_lines = {}
    damaged_poly = {}
    damaged_points = {}

    if hazard_type=='tc':
        # read wind data
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        df_ds = open_storm_data(country_code)

        # remove assets that will not have any damage
        osm_lines = osm_lines.loc[osm_lines.asset != 'cable'].reset_index(drop=True)
        osm_lines['asset'] = osm_lines['asset'].replace(['minor_line'], 'line')
        osm_poly = osm_poly.loc[osm_poly.asset != 'plant'].reset_index(drop=True)            
    
    elif hazard_type=='fl':
        # read flood data
        climate_models = ['historical','rcp8p5']
        df_ds = open_flood_data(country_code)
    
        
    for climate_model in climate_models:
        if hazard_type=='tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000']     
    
        # assess damage for lines
        #print(df_ds[climate_model])
        #print(overlay_hazard_assets(df_ds[climate_model],osm_lines).T)
        overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_lines).T,
                                     columns=['asset','hazard_point','geometry'])
        
        overlay_lines_new = overlay_lines
        overlay_lines_new['geometry'] = None
        for index, row in overlay_lines_new.iterrows():
            hazard_point = row['hazard_point']
            geometry = df_ds[climate_model].loc[hazard_point, 'geometry']
            overlay_lines_new.at[index, 'geometry'] = geometry
            
        overlay_lines_new.to_excel(os.path.join(output_path,f'{country_code}_overlay_lines_{climate_model}.xlsx'))


        if len(overlay_lines) == 0:
            damaged_lines[climate_model] = pd.DataFrame()

        else:
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            osm_lines,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_line = dict(zip(osm_lines.index,osm_lines.asset))
            
            if hazard_type == 'tc':
                results = pd.DataFrame([item for sublist in collect_line_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

            elif hazard_type == 'fl':
                #results = pd.DataFrame(collect_line_damages,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results = pd.DataFrame(np.array(list(flatten(collect_line_damages))).reshape(
                    int(len(list(flatten(collect_line_damages)))/6), 6),
                                       columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset'] = results['asset'].astype(int)
                results[['meandam','lowerdam','upperdam']] = results[['meandam','lowerdam','upperdam']].astype(float)
                
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                #sum damage of line, cable, and minor_line
                results['curve'] = results['curve'].replace(['cable', 'minor_line'], 'line')
                results['asset_type'] = results['asset_type'].replace(['cable', 'minor_line'], 'line')

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
                
        # assess damage for polygons
        if len(osm_poly) > 0:
            overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_poly).T,
                                    columns=['asset','hazard_point'])
        else:
            overlay_poly = pd.DataFrame()
            
            overlay_poly.to_excel(os.path.join(output_path,f'{country_code}_overlay_poly_{climate_model}.xlsx'))

        if len(overlay_poly) == 0:
            damaged_poly[climate_model] = pd.DataFrame()

        else:
            collect_poly_damages = []
            for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),
                              desc='polygon damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_poly_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            osm_poly,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_poly = dict(zip(osm_poly.index,osm_poly.asset))
            
            results = pd.DataFrame([item for sublist in collect_poly_damages 
                                    for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

            results['asset_type'] = results.asset.apply(lambda x : get_asset_type_poly[x])    

            damaged_poly[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
            
        #assess damage for points
        overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_points).T,
                                      columns=['asset','hazard_point'])
        overlay_points.to_excel(os.path.join(output_path,f'{country_code}_overlay_points_{climate_model}.xlsx'))

        if len(overlay_points) == 0:
            damaged_points[climate_model] = pd.DataFrame()

        else:
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            osm_points,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_point = dict(zip(osm_points.index,osm_points.asset))
            
            if hazard_type == 'tc':
                results = pd.DataFrame([item for sublist in collect_point_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])    
                
                damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
            
            elif hazard_type == 'fl':
                results = pd.DataFrame(np.array(list(flatten(collect_point_damages))).reshape(
                    int(len(list(flatten(collect_point_damages)))/6), 6),
                                       columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset'] = results['asset'].astype(int)
                results[['meandam','lowerdam','upperdam']] = results[['meandam','lowerdam','upperdam']].astype(float)
                
                #return collect_point_damages,get_asset_type_point
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])    

                damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

    return damaged_lines,damaged_poly,damaged_points

In [144]:
osm_power_infra = extract_osm_infrastructure('TWN',osm_data_path)

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████| 2470/2470 [00:07<00:00, 350.32it/s]


query is finished, lets start the loop


extract: 100%|███████████████████████████████████████████████████████████████████████| 368/368 [00:16<00:00, 22.39it/s]


['substation' 'plant']
query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████| 1608621/1608621 [02:36<00:00, 10265.36it/s]


In [181]:
def save_exposure(country_code,osm_power_infra,hazard_type): #NEW VERSION
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()
    
    # read infrastructure data:
    osm_lines,osm_poly,osm_points = osm_power_infra

    if hazard_type=='tc':
        # read wind data
        climate_models = [''] #,'_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM'
        df_ds = twn_wind #open_storm_data(country_code)!!!!!!!!!!!!!!!!!!!
        
        # remove assets that will not have any damage
        osm_lines = osm_lines.loc[osm_lines.asset != 'cable'].reset_index(drop=True)
        osm_lines['asset'] = osm_lines['asset'].replace(['minor_line'], 'line')
        osm_poly = osm_poly.loc[osm_poly.asset != 'plant'].reset_index(drop=True)
            
    elif hazard_type=='fl':
        # read flood data
        climate_models = ['historical'] #,'rcp8p5'
        df_ds = twn_flood #open_flood_data(country_code)!!!!!!!!!!!!!!!!!!!
                
    for climate_model in climate_models:
        if hazard_type=='tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),
                              '1_10{}'.format(climate_model),'1_25{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_250{}'.format(climate_model),'1_500{}'.format(climate_model),
                              '1_1000{}'.format(climate_model)]
            
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'] 
            
        for return_period in return_periods:
            # assess damage for lines
            #print(df_ds[climate_model][[return_period,'geometry']])

            df_hazard = df_ds[climate_model][[return_period,'geometry']]
            df_hazard = df_hazard[~df_hazard.eq(0).any(axis=1)]
            print(df_hazard)
            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_hazard,osm_lines).T,
                                         columns=['asset','hazard_point'])

            overlay_lines['geometry'] = None

            for index, row in overlay_lines.iterrows():
                hazard_point = row['hazard_point']
                if hazard_point not in df_hazard.index:
                    overlay_lines = overlay_lines.drop(index)
                else:
                    geometry = df_hazard.loc[hazard_point, 'geometry']
                    overlay_lines.at[index, 'geometry'] = geometry

            #overlay_lines.to_excel(os.path.join(output_path,f'{country_code}_overlay_lines_{climate_model}.xlsx'))

            # assess damage for polygons
            if len(osm_poly) > 0:
                overlay_poly = pd.DataFrame(overlay_hazard_assets(df_hazard,osm_poly).T,
                                        columns=['asset','hazard_point'])
            else:
                overlay_poly = pd.DataFrame()

            overlay_poly['geometry'] = None

            for index, row in overlay_poly.iterrows():
                hazard_point = row['hazard_point']
                if hazard_point not in df_hazard.index:
                    overlay_poly = overlay_poly.drop(index)
                else:
                    geometry = df_hazard.loc[hazard_point, 'geometry']
                    overlay_poly.at[index, 'geometry'] = geometry
                    
            #assess damage for points
            overlay_points = pd.DataFrame(overlay_hazard_assets(df_hazard,osm_points).T,
                                          columns=['asset','hazard_point'])

            overlay_points['geometry'] = None

            for index, row in overlay_points.iterrows():
                hazard_point = row['hazard_point']
                if hazard_point not in df_hazard.index:
                    overlay_points = overlay_points.drop(index)
                else:
                    geometry = df_hazard.loc[hazard_point, 'geometry']
                    overlay_points.at[index, 'geometry'] = geometry
        
            df = pd.concat([overlay_lines,overlay_poly,overlay_points])

            # 根据hazard_point计算每个hazard_point对应的asset个数
            hazard_counts = df.groupby('hazard_point')['asset'].nunique().reset_index()
            hazard_counts.columns = ['hazard_point', 'asset_count']

            # 从原始DataFrame中获取每个hazard_point对应的geometry
            hazard_geometry = df[['hazard_point', 'geometry']].drop_duplicates()

            osm_exposure = pd.merge(hazard_counts, hazard_geometry, on='hazard_point')

            osm_exposure.to_excel(os.path.join(output_path,exposure,f'{country_code}_osm_exposure_{hazard_type}_{return_period}.xlsx'))

    return osm_exposure

In [183]:
osm_exposure = save_exposure('TWN',osm_power_infra,'tc')

            1_1                                           geometry
0     35.695326  POLYGON ((13057776.27 2391878.588, 13057776.27...
1     35.847856  POLYGON ((13068908.219 2391878.588, 13068908.2...
2     35.827720  POLYGON ((13080040.168 2391878.588, 13080040.1...
3     35.713667  POLYGON ((13091172.117 2391878.588, 13091172.1...
4     35.605688  POLYGON ((13102304.066 2391878.588, 13102304.0...
...         ...                                                ...
3181  39.264171  POLYGON ((13658901.52 3036284.923, 13658901.52...
3182  39.642033  POLYGON ((13670033.469 3036284.923, 13670033.4...
3183  39.615541  POLYGON ((13681165.418 3036284.923, 13681165.4...
3184  39.849922  POLYGON ((13692297.368 3036284.923, 13692297.3...
3185  39.730572  POLYGON ((13703429.317 3036284.923, 13703429.3...

[3186 rows x 2 columns]
            1_2                                           geometry
0     36.153372  POLYGON ((13057776.27 2391878.588, 13057776.27...
1     36.301064  POLYGON ((13068908.2


KeyboardInterrupt



In [89]:
osm_exposure

Unnamed: 0,hazard_point,asset_count,geometry
0,840.0,17,"POLYGON ((11577227.043 1186775.975, 11577227.0..."
1,841.0,34,"POLYGON ((11588358.992 1186775.975, 11588358.9..."
2,842.0,19,"POLYGON ((11599490.941 1186775.975, 11599490.9..."
3,910.0,22,"POLYGON ((11532699.246 1198103.041, 11532699.2..."
4,913.0,3,"POLYGON ((11566095.093 1198103.041, 11566095.0..."
...,...,...,...
211,3376.0,10,"POLYGON ((11799866.024 1574216.548, 11799866.0..."
212,3377.0,7,"POLYGON ((11810997.973 1574216.548, 11810997.9..."
213,3391.0,16,"POLYGON ((11966845.26 1574216.548, 11966845.26..."
214,3449.0,12,"POLYGON ((11788734.075 1585691.789, 11788734.0..."


In [61]:
osm_exposure

Unnamed: 0,hazard_point,asset_count,geometry
0,840.0,17,"POLYGON ((11577227.043 1186775.975, 11577227.0..."
1,841.0,34,"POLYGON ((11588358.992 1186775.975, 11588358.9..."
2,842.0,19,"POLYGON ((11599490.941 1186775.975, 11599490.9..."
3,910.0,22,"POLYGON ((11532699.246 1198103.041, 11532699.2..."
4,913.0,3,"POLYGON ((11566095.093 1198103.041, 11566095.0..."
...,...,...,...
211,3376.0,10,"POLYGON ((11799866.024 1574216.548, 11799866.0..."
212,3377.0,7,"POLYGON ((11810997.973 1574216.548, 11810997.9..."
213,3391.0,16,"POLYGON ((11966845.26 1574216.548, 11966845.26..."
214,3449.0,12,"POLYGON ((11788734.075 1585691.789, 11788734.0..."


Unnamed: 0,asset,hazard_point,geometry
0,0.0,3391.0,"POLYGON ((11966845.26 1574216.548, 11966845.26..."
1,1.0,1589.0,"POLYGON ((11677414.584 1300219.208, 11677414.5..."
2,2.0,917.0,"POLYGON ((11610622.89 1198103.041, 11610622.89..."
3,3.0,916.0,"POLYGON ((11599490.941 1198103.041, 11599490.9..."
4,3.0,917.0,"POLYGON ((11610622.89 1198103.041, 11610622.89..."
...,...,...,...
4396,4257.0,2114.0,"POLYGON ((11755338.228 1379869.508, 11755338.2..."
4397,4258.0,2114.0,"POLYGON ((11755338.228 1379869.508, 11755338.2..."
4398,4259.0,2114.0,"POLYGON ((11755338.228 1379869.508, 11755338.2..."
4399,4260.0,2114.0,"POLYGON ((11755338.228 1379869.508, 11755338.2..."


In [66]:
from shapely import wkt
new_df = osm_exposure.copy()

# 将geometry列中的pygeos.lib.Geometry对象转换为WKT字符串
new_df['geometry'] = new_df['geometry'].apply(lambda geom: wkt.dumps(geom))

# 将geometry列转换为Geopandas的几何对象
new_df['geometry'] = gpd.GeoSeries.from_wkt(new_df['geometry'])

# 创建Geopandas的GeoDataFrame
gdf = gpd.GeoDataFrame(new_df)

# 绘制地图
fig, ax = plt.subplots(figsize=(10, 10))
gdf.plot(ax=ax, column='asset_count', cmap='YlOrRd', edgecolor='black', linewidth=0.5, legend=True)

# 设置图标题和坐标轴标签
ax.set_title('Asset Count Map')
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

# 显示图形
plt.show()


TypeError: One of the arguments is of incorrect type. Please provide only Geometry objects.

In [82]:
# extract nested dict by key
osm_damage_infra[1]['_CMCC-CM2-VHR4']

Unnamed: 0,rp,curve,asset_type,meandam,lowerdam,upperdam
0,1_1000_CMCC-CM2-VHR4,W2_1_1,substation,0.000069,0.000051,0.000086
1,1_1000_CMCC-CM2-VHR4,W2_1_2,substation,0.000042,0.000032,0.000053
2,1_1000_CMCC-CM2-VHR4,W2_1_3,substation,0.000023,0.000017,0.000029
3,1_1000_CMCC-CM2-VHR4,W2_1_4,substation,0.000014,0.000011,0.000018
4,1_1000_CMCC-CM2-VHR4,W2_1_5,substation,0.000012,0.000009,0.000015
...,...,...,...,...,...,...
415,1_5_CMCC-CM2-VHR4,W2_7_2,substation,897.686595,673.264946,1122.108244
416,1_5_CMCC-CM2-VHR4,W2_7_3,substation,484.363271,363.272453,605.454088
417,1_5_CMCC-CM2-VHR4,W2_7_4,substation,305.148861,228.861645,381.436076
418,1_5_CMCC-CM2-VHR4,W2_7_5,substation,256.712533,192.534400,320.890667


In [11]:
def country_analysis_osm(country_code,hazard_type):
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()
    
    # extract infrastructure data from OSM
    osm_power_infra = extract_osm_infrastructure(country_code,osm_data_path)
    
    # assess damage to hazard_type
    osm_damage_infra = assess_damage_osm(country_code,osm_power_infra,hazard_type)
    
    line_risk = {}
    plant_risk = {}
    substation_risk = {}
    tower_risk = {}
    pole_risk = {}

    if hazard_type=='tc':
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

        for i in range(len(osm_damage_infra)):
            for climate_model in climate_models:
                df = osm_damage_infra[i][climate_model]

                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_osm_{}{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),
                                                '1_10{}'.format(climate_model),'1_25{}'.format(climate_model),'1_50{}'.format(climate_model),
                                                '1_100{}'.format(climate_model),'1_250{}'.format(climate_model),'1_500{}'.format(climate_model),
                                                '1_1000{}'.format(climate_model)],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])

                    curve_code_substation = ['W2_1_1','W2_1_2','W2_1_3','W2_1_4','W2_1_5','W2_1_6','W2_2_1','W2_2_2','W2_2_3','W2_2_4','W2_2_5','W2_2_6',
                                             'W2_3_1','W2_3_2','W2_3_3','W2_3_4','W2_3_5','W2_3_6','W2_4_1','W2_4_2','W2_4_3','W2_4_4','W2_4_5','W2_4_6',
                                             'W2_5_1','W2_5_2','W2_5_3','W2_5_4','W2_5_5','W2_5_6','W2_6_1','W2_6_2','W2_6_3','W2_6_4','W2_6_5','W2_6_6',
                                             'W2_7_1','W2_7_2','W2_7_3','W2_7_4','W2_7_5','W2_7_6']


                    curve_code_tower = ['W3_1','W3_2','W3_3','W3_4','W3_5','W3_6','W3_7','W3_8','W3_9','W3_10','W3_11','W3_12','W3_13','W3_14','W3_15',
                                        'W3_16','W3_17','W3_18','W3_19','W3_20','W3_21','W3_22','W3_23','W3_24','W3_25','W3_26','W3_27','W3_28']

                    curve_code_pole = ['W4_1','W4_2','W4_3','W4_4','W4_5','W4_6','W4_7','W4_8','W4_9','W4_10','W4_11','W4_12',
                                       'W4_13','W4_14','W4_15','W4_16','W4_17','W4_18','W4_19','W4_20','W4_21','W4_22','W4_23',
                                       'W4_24','W4_25','W4_26','W4_27','W4_28','W4_29','W4_30','W4_31','W4_32','W4_33','W4_34',
                                       'W4_35','W4_36','W4_37','W4_38','W4_39','W4_40','W4_41','W4_42','W4_43','W4_44','W4_45',
                                       'W4_46','W4_47','W4_48','W4_49','W4_50','W4_51','W4_52','W4_53','W4_54','W4_55','W4_56']

                    curve_code_line = ['W5_1_1','W5_1_2','W5_1_3','W5_1_4','W5_1_5','W5_1_6','W5_1_7','W5_1_8','W5_1_9','W5_1_10','W5_1_11','W5_1_12',
                                       'W5_2_1','W5_2_2','W5_2_3','W5_2_4','W5_2_5','W5_2_6','W5_2_7','W5_2_8','W5_2_9','W5_2_10','W5_2_11','W5_2_12',
                                       'W5_3_1','W5_3_2','W5_3_3','W5_3_4','W5_3_5','W5_3_6','W5_3_7','W5_3_8','W5_3_9','W5_3_10','W5_3_11','W5_3_12',
                                       'W5_4_1','W5_4_2','W5_4_3','W5_4_4','W5_4_5','W5_4_6','W5_4_7','W5_4_8','W5_4_9','W5_4_10','W5_4_11','W5_4_12',
                                       'W5_5_1','W5_5_2','W5_5_3','W5_5_4','W5_5_5','W5_5_6','W5_5_7','W5_5_8','W5_5_9','W5_5_10','W5_5_11','W5_5_12',
                                       'W5_6_1','W5_6_2','W5_6_3','W5_6_4','W5_6_5','W5_6_6','W5_6_7','W5_6_8','W5_6_9','W5_6_10','W5_6_11','W5_6_12',
                                       'W5_7_1','W5_7_2','W5_7_3','W5_7_4','W5_7_5','W5_7_6','W5_7_7','W5_7_8','W5_7_9','W5_7_10','W5_7_11','W5_7_12']

                    #assess risk for power lines
                    if i == 0:
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power lines ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                line_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                    #assess risk for power substations                
                    elif i == 1:                        
                        for curve_code in curve_code_substation:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of substations ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                substation_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                    #assess risk for power towers and power poles
                    elif i == 2:
                        for curve_code in curve_code_tower:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power towers ...")

                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                tower_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                            for curve_code in curve_code_pole:
                                loss_list = df.loc[df['curve'] == curve_code]
                                loss_list = loss_list.sort_values(by='rp',ascending=False)
                                if len(loss_list) == 0:
                                    print("No risk of power poles ...")

                                else:                    
                                    loss_list_mean = loss_list.meandam.values.tolist()
                                    loss_list_lower = loss_list.lowerdam.values.tolist()
                                    loss_list_upper = loss_list.upperdam.values.tolist()
                                    RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                    RPS = RPS.rp.values.tolist()
                                    pole_risk[climate_model,curve_code] = {
                                        'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                        'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                        'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                    }

    elif hazard_type=='fl':
        climate_models = ['historical','rcp8p5']
    
        for i in range(len(osm_damage_infra)):
            for climate_model in climate_models:
                df = osm_damage_infra[i][climate_model]
                    
                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_osm_{}_{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])
                    
                    curve_code_plant = ['F1_1_1','F1_1_2','F1_1_3']
                    curve_code_substation = ['F2_1_1','F2_1_2','F2_1_3']
                    curve_code_tower = ['F3_1_1','F3_1_2']
                    curve_code_pole = ['F4_1_1','F4_1_2','F4_1_3','F4_1_4']
                    curve_code_line = ['F5_1_1','F5_1_2','F5_1_3','F5_1_4','F5_1_5','F5_1_6','F5_1_7','F5_1_8',
                                      'F5_1_9','F5_1_10','F5_1_11','F5_1_12']
                    curve_code_minor_line = ['F5_2']
                    curve_code_cable = ['F5_3_1','F5_3_2']

                    #assess risk for power lines
                    if i == 0:
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of power lines ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                line_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                    #assess risk for power plants and substations                
                    elif i == 1:
                        for curve_code in curve_code_plant:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of plants ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                plant_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                        for curve_code in curve_code_substation:    
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of substations ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                substation_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                    }

                    #assess risk for power towers and power poles
                    elif i == 2:
                        for curve_code in curve_code_tower:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of power towers ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                tower_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                            
                        for curve_code in curve_code_pole:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of power poles ...")
                            
                            else:                    
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                pole_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                                
    return pd.DataFrame(line_risk),pd.DataFrame(plant_risk),pd.DataFrame(substation_risk),pd.DataFrame(tower_risk),pd.DataFrame(pole_risk)

# Government data processing

In [10]:
def extract_pg_infrastructure(country_code):
    """_summary_

    Args:
        country_code (_type_): _description_
        pg_type (_type_): _description_

    Returns:
        _type_: _description_
    """

    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()

    files = [x for x in os.listdir(pg_data_path)  if country_code in x ]
    pg_types = ['line','point']
    
    for pg_type in pg_types:
        if os.path.isfile(os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))):
            if pg_type=='line':
                for file in files: 
                    file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))

                    pg_data_country = gpd.read_file(file_path)
                    pg_data_country = pd.DataFrame(pg_data_country.copy())
                    pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
                    pg_data_country['geometry'] = reproject(pg_data_country)

                pg_lines = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['line'])],buffer_size=100).reset_index(drop=True)

            elif pg_type=='point':
                for file in files:
                    file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))

                    pg_data_country = gpd.read_file(file_path)
                    pg_data_country = pd.DataFrame(pg_data_country.copy())
                    pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
                    pg_data_country['geometry'] = reproject(pg_data_country)

                pg_points = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['plant','substation','power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)

    return pg_lines,pg_points

In [11]:
pg_infra = extract_pg_infrastructure('BRN')
print(type(pg_infra))

<class 'tuple'>


In [13]:
pg_infra[0]

Unnamed: 0,voltage_kv,asset,geometry,buffered
0,66,line,"LINESTRING (12709972.524 509284.354, 12718390....","POLYGON ((12718400.226 508569.237, 12718401.50..."
1,66,line,"LINESTRING (12723006.954 513810.198, 12733597....","POLYGON ((12733571.047 516803.195, 12733590.37..."
2,66,line,"LINESTRING (12770799.864 531913.573, 12777498....","POLYGON ((12777414.455 542196.764, 12777417.71..."
3,66,line,"LINESTRING (12788903.239 556534.163, 12793248....","POLYGON ((12793283.162 554998.492, 12793287.25..."
4,132,line,"LINESTRING (12718119.043 508560.219, 12724274....","POLYGON ((12724292.117 505675.616, 12733601.32..."
5,275,line,"LINESTRING (12743101.701 508288.668, 12750343....","POLYGON ((12750290.949 509198.09, 12759941.625..."
6,132,line,"LINESTRING (12788541.172 550288.498, 12792161....","POLYGON ((12792135.938 548602.919, 12802404.13..."
7,66,line,"LINESTRING (12735679.317 517385.614, 12736675....","POLYGON ((12736580.434 520314.662, 12736588.59..."
8,66,line,"LINESTRING (12735543.542 517204.581, 12741155....","POLYGON ((12741121.474 519335.212, 12741140.46..."
9,66,line,"LINESTRING (12735905.609 516933.03, 12751202.9...","POLYGON ((12751172.269 521961.373, 12751193.13..."


In [21]:
def assess_damage_pg(country_code,pg_infra,hazard_type):
    """_summary_

    Args:
        country_code (_type_): _description_
        pg_data_country (_type_): _description_

    Returns:
        _type_: _description_
    """
    
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()

    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(country_code,vul_curve_path,hazard_type)
    
    # read infrastructure data:
    pg_lines,pg_points = pg_infra
    
    #calculate damaged lines/polygons/points in loop by climate_model
    damaged_lines = {}
    damaged_points = {}

    if hazard_type=='tc':
        # read wind data
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        df_ds = open_storm_data(country_code)
        
        # remove assets that will not have any damage
        pg_points = pg_points.loc[pg_points.asset != 'plant'].reset_index(drop=True)
    
    elif hazard_type == 'fl':
        # read flood data
        climate_models = ['historical','rcp8p5']
        df_ds = open_flood_data(country_code)
        
    for climate_model in climate_models:
        if hazard_type=='tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'] 
            
        # assess damage for lines
        overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_lines).T,
                                     columns=['asset','hazard_point'])

        if len(overlay_lines) == 0:
            damaged_lines[climate_model] = pd.DataFrame()

        else:
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            pg_lines,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_line = dict(zip(pg_lines.index,pg_lines.asset))
            
            if hazard_type=='tc':
                results = pd.DataFrame([item for sublist in collect_line_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
                
            elif hazard_type == 'fl':
                results = pd.DataFrame(collect_line_damages,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                #sum damage of line, cable, and minor_line
                results['curve'] = results['curve'].replace(['cable', 'minor_line'], 'line')
                results['asset_type'] = results['asset_type'].replace(['cable', 'minor_line'], 'line')

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index() 

        # assess damage for points
        overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_points).T,
                                      columns=['asset','hazard_point'])

        if len(overlay_points) == 0:
            damaged_points[climate_model] = pd.DataFrame()

        else:
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            pg_points,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_point = dict(zip(pg_points.index,pg_points.asset))
            
            if hazard_type == 'tc':
                results = pd.DataFrame([item for sublist in collect_point_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
            elif hazard_type == 'fl':
                results = pd.DataFrame(collect_point_damages ,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

            results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])    

            damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

                
    return damaged_lines,damaged_points

In [22]:
%%time
pg_damage_infra = assess_damage_pg('BRN',pg_infra,'fl')

Infrastructure type  plant               substation                  \
Code                F1_1_1 F1_1_2 F1_1_3     F2_1_1  F2_1_2  F2_1_3   
Depth (cm)                                                            
265.0                0.217  0.217  0.217     0.1340  0.1340  0.1340   
270.0                0.243  0.243  0.243     0.1370  0.1370  0.1370   
274.0                0.247  0.247  0.247     0.1385  0.1385  0.1385   
275.0                0.251  0.251  0.251     0.1400  0.1400  0.1400   
280.0                0.259  0.259  0.259     0.1440  0.1440  0.1440   
285.0                0.268  0.268  0.268     0.1470  0.1470  0.1470   
290.0                0.276  0.276  0.276     0.1500  0.1500  0.1500   
295.0                0.284  0.284  0.284     0.1540  0.1540  0.1540   
300.0                0.292  0.292  0.292     0.1570  0.1570  0.1570   
305.0                0.300  0.300  0.300     0.1500  0.1500  0.1500   

Infrastructure type power_tower        power_pole         ...   line        

polyline damage calculation for BRN fl (rcp8p5): 100%|███████████████████████████████████| 1/1 [00:00<00:00, 22.22it/s]

CPU times: total: 5.09 s
Wall time: 5.25 s





In [26]:
pg_damage_infra[1]['rcp8p5']

In [33]:
%%time
pg_damage_infra_fl = assess_damage_pg('JPN',pg_infra,'fl')

Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for JPN fl (historical): 100%|███████████████████████████████| 5/5 [00:00<00:00, 25.00it/s]
point damage calculation for JPN fl (historical): 100%|██████████████████████████████████| 1/1 [00:00<00:00, 45.45it/s]
polyline damage calculation for JPN fl (rcp8p5): 100%|███████████████████████████████████| 5/5 [00:00<00:00, 24.04it/s]
point damage calculation for JPN fl (rcp8p5): 100%|██████████████████████████████████████| 1/1 [00:00<00:00, 50.00it/s]


CPU times: total: 8min 7s
Wall time: 8min 8s


In [34]:
pg_damage_infra_fl

({'historical':        rp       curve  asset_type        meandam       lowerdam       upperdam
  0  rp0001  substation  substation  490588.160384  367941.120288  613235.200480
  1  rp0002  substation  substation  517498.328609  388123.746457  646872.910762
  2  rp0005  substation  substation  559647.208366  419735.406274  699559.010457
  3  rp0010  substation  substation  579966.806294  434975.104721  724958.507868
  4  rp0025  substation  substation  608736.040167  456552.030125  760920.050208
  5  rp0050  substation  substation  632107.357808  474080.518356  790134.197260
  6  rp0100  substation  substation  652340.431603  489255.323702  815425.539504
  7  rp0250  substation  substation  682990.145845  512242.609384  853737.682306
  8  rp0500  substation  substation  699857.025260  524892.768945  874821.281575
  9  rp1000  substation  substation  721527.021566  541145.266174  901908.776957,
  'rcp8p5':        rp       curve  asset_type        meandam       lowerdam       upperdam
  0

In [2]:
def country_analysis_pg(country_code,hazard_type):
    """_summary_

    Args:
        country_code (_type_): _description_
        hazard_type (str, optional): _description_. Defaults to 'OSM'.

    Returns:
        _type_: _description_
    """
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()
    
    # extract infrastructure data from gov data
    pg_power_infra = extract_pg_infrastructure(country_code)
    
    # assess damage to hazard_type
    pg_damage_infra = assess_damage_pg(country_code,pg_power_infra,hazard_type)
    
    line_risk = {}
    plant_risk = {}
    substation_risk = {}

    if hazard_type=='tc':
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

        for i in range(len(pg_damage_infra)):
            for climate_model in climate_models:
                df = pg_damage_infra[i][climate_model]
                    
                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_pg_{}{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),
                                                '1_10{}'.format(climate_model),'1_25{}'.format(climate_model),'1_50{}'.format(climate_model),
                                                '1_100{}'.format(climate_model),'1_250{}'.format(climate_model),'1_500{}'.format(climate_model),
                                                '1_1000{}'.format(climate_model)],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])
                    
                    curve_code_substation = ['W2_1_1','W2_1_2','W2_1_3','W2_2_1','W2_2_2','W2_2_3','W2_3_1','W2_3_2','W2_3_3',
                                            'W2_4_1','W2_4_2','W2_4_3','W2_5_1','W2_5_2','W2_5_3','W2_6_1','W2_6_2','W2_6_3',
                                            'W2_7_1','W2_7_2','W2_7_3']
                    
                    curve_code_line = ['W5_1','W5_2','W5_3']

                    #assess risk for power lines
                    if i == 0:
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power lines ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()

                                line_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                            #print(line_risk_curve)
                    
                    #assess risk for power substations                
                    elif i == 1:                        
                        for curve_code in curve_code_substation:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of substations ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()

                                substation_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }


    elif hazard_type=='fl':
        climate_models = ['historical','rcp8p5']
    
        for i in range(len(pg_damage_infra)):
            for climate_model in climate_models:
                df = pg_damage_infra[i][climate_model]
                    
                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_pg_{}_{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])
                    
                    curve_code_plant = ['F1_1_1','F1_1_2','F1_1_3']
                    curve_code_substation = ['F2_1_1','F2_1_2','F2_1_3']
                    curve_code_line = ['F5_1']

                    #assess risk for power lines
                    if i == 0:
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of power lines ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                line_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                    #assess risk for power plants and substations                
                    elif i == 1:
                        for curve_code in curve_code_plant:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of plants ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                plant_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                            
                        for curve_code in curve_code_substation:
                            loss_list = df.loc[df['curve'] == curve_code]
                            if len(loss_list) == 0:
                                print("No risk of substations ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = loss_list.loc[loss_list['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                substation_risk[climate_model,curve_code] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                    }
                            
    return pd.DataFrame(line_risk),pd.DataFrame(plant_risk),pd.DataFrame(substation_risk)

In [None]:
pg_risk_tc = country_analysis_pg('JPN','tc')

In [178]:
pd.DataFrame(pg_risk_tc[2])

Unnamed: 0,_CMCC-CM2-VHR4,_CNRM-CM6-1-HR
0,W2_7_3 mean_risk 1.434933e...,W2_7_3 mean_risk 1.544563e...


In [60]:
pg_risk_fl = country_analysis_pg('JPN','fl')

Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for JPN fl (historical): 100%|███████████████████████████████| 5/5 [00:00<00:00, 24.88it/s]
point damage calculation for JPN fl (historical): 100%|██████████████████████████████████| 1/1 [00:00<00:00, 41.67it/s]
polyline damage calculation for JPN fl (rcp8p5): 100%|███████████████████████████████████| 5/5 [00:00<00:00, 23.80it/s]
point damage calculation for JPN fl (rcp8p5): 100%|██████████████████████████████████████| 1/1 [00:00<00:00, 49.92it/s]


No risk of plants ...
No risk of plants ...


# Save results

In [17]:
def risk_output(country_code,hazard_type,infra_type):
    # set paths
    #data_path,tc_path,fl_path,osm_data_path,pg_data_path,vul_curve_path,output_path,ne_path = set_paths()
    
    if hazard_type == 'tc':
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        
    elif hazard_type == 'fl':
        climate_models = ['historical','rcp8p5']
  
    if infra_type == 'osm':
        line_risk,plant_risk,substation_risk,tower_risk,pole_risk = country_analysis_osm(country_code,hazard_type)
    
    elif infra_type == 'gov':
        line_risk,plant_risk,substation_risk = country_analysis_pg(country_code,hazard_type)
            
    for climate_model in climate_models:
        if climate_model == '':
            writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_{}_risk'.format(country_code,infra_type,hazard_type,'present')+'.xlsx'),
                                    engine='openpyxl')
        else:
            writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_{}_risk'.format(country_code,infra_type,hazard_type,climate_model)+'.xlsx'),
                                    engine='openpyxl')

        # write each dataframe to a different sheet
        if len(line_risk) != 0:
            line_risk[climate_model].to_excel(writer, sheet_name='line_risk')
        if len(plant_risk) != 0:
            plant_risk[climate_model].to_excel(writer, sheet_name='plant_risk')
        if len(substation_risk) != 0:
            substation_risk[climate_model].to_excel(writer, sheet_name='substation_risk')
        if len(tower_risk) != 0:
            tower_risk[climate_model].to_excel(writer, sheet_name='tower_risk')
        if len(pole_risk) != 0:
            pole_risk[climate_model].to_excel(writer, sheet_name='pole_risk')

        # save the Excel file
        if writer.sheets:
            writer.save()

        else:
            df = pd.DataFrame()
            df.loc[1:3, 0] = ['mean_risk', 'lower_risk', 'upper_risk']

            writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_{}_risk'.format(country_code,infra_type,hazard_type,climate_model)+'.xlsx'))
            df.to_excel(writer,sheet_name='line_risk', index=False)
            writer.save()

In [18]:
%%time
risk_output('BRN','tc','osm')

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████████| 87/87 [00:00<00:00, 435.42it/s]


query is finished, lets start the loop


extract: 100%|█████████████████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 83.31it/s]


['plant' 'substation']
query is finished, lets start the loop


extract: 100%|██████████████████████████████████████████████████████████████████| 8774/8774 [00:00<00:00, 10222.38it/s]


The ratio_usa for BRN is 0.5201


polygon damage calculation for BRN tc (): 100%|██████████████████████████████████████████| 1/1 [00:01<00:00,  1.63s/it]
polygon damage calculation for BRN tc (_CMCC-CM2-VHR4): 100%|████████████████████████████| 1/1 [00:01<00:00,  1.61s/it]
polygon damage calculation for BRN tc (_CNRM-CM6-1-HR): 100%|████████████████████████████| 1/1 [00:01<00:00,  1.64s/it]
polygon damage calculation for BRN tc (_EC-Earth3P-HR): 100%|████████████████████████████| 1/1 [00:01<00:00,  1.61s/it]
polygon damage calculation for BRN tc (_HadGEM3-GC31-HM): 100%|██████████████████████████| 1/1 [00:01<00:00,  1.60s/it]


No tc_ risk of infra_type 0 in BRN
No tc__CMCC-CM2-VHR4 risk of infra_type 0 in BRN
No tc__CNRM-CM6-1-HR risk of infra_type 0 in BRN
No tc__EC-Earth3P-HR risk of infra_type 0 in BRN
No tc__HadGEM3-GC31-HM risk of infra_type 0 in BRN
No tc_ risk of infra_type 2 in BRN
No tc__CMCC-CM2-VHR4 risk of infra_type 2 in BRN
No tc__CNRM-CM6-1-HR risk of infra_type 2 in BRN
No tc__EC-Earth3P-HR risk of infra_type 2 in BRN
No tc__HadGEM3-GC31-HM risk of infra_type 2 in BRN


AttributeError: 'OpenpyxlWriter' object has no attribute 'save'

In [356]:
osm_damage_infra[1]['historical'].loc[osm_damage_infra[1]['historical']['asset_type'] == 'plant']

Unnamed: 0,rp,curve,asset_type,meandam,lowerdam,upperdam
0,1.0,plant,plant,1658665000.0,452363300.0,2261816000.0
2,0.5,plant,plant,1730781000.0,472031100.0,2360156000.0
4,0.2,plant,plant,2388258000.0,651343000.0,3256715000.0
6,0.1,plant,plant,2552940000.0,696256400.0,3481282000.0
8,0.04,plant,plant,2767454000.0,754760100.0,3773801000.0
10,0.02,plant,plant,2921201000.0,796691200.0,3983456000.0
12,0.01,plant,plant,3080762000.0,840207900.0,4201039000.0
14,0.004,plant,plant,3356950000.0,915532000.0,4577660000.0
16,0.002,plant,plant,3528684000.0,962368400.0,4811842000.0
18,0.001,plant,plant,3698459000.0,1008671000.0,5043354000.0


In [336]:
osm_damage_infra[0]['historical'].loc[:,"rp"]

0     1.000
1     1.000
2     1.000
3     0.500
4     0.500
5     0.500
6     0.200
7     0.200
8     0.200
9     0.100
10    0.100
11    0.100
12    0.040
13    0.040
14    0.040
15    0.020
16    0.020
17    0.020
18    0.010
19    0.010
20    0.010
21    0.004
22    0.004
23    0.004
24    0.002
25    0.002
26    0.002
27    0.001
28    0.001
29    0.001
Name: rp, dtype: float64

In [None]:
"""
def clip_gridfinder(country_code):
    base_map_path = os.path.join(data_path,'base_map')

    cty_boundary_path = os.path.join(base_map_path,'gadm41_{}.gpkg'.format(country_code))
    cty_boundary = gpd.read_file(cty_boundary_path)
    #mask = pd.DataFrame(mask.copy())
    #mask.geometry = pygeos.from_shapely(mask.geometry)
    #mask['geometry'] = reproject(mask)

    gridfinder_path = r'C:\Users\mye500\OneDrive - Vrije Universiteit Amsterdam\01_Research-Projects\01_risk_assessment\PG_data\gridfinder\grid.gpkg'
    gridfinder = gpd.read_file(gridfinder_path)
    #gridfinder = pd.DataFrame(gridfinder.copy())
    #gridfinder.geometry = pygeos.from_shapely(gridfinder.geometry)
    #gridfinder['geometry'] = reproject(gridfinder)

    clipped = gpd.clip(gridfinder,cty_boundary)

    return clipped

clip_gridfinder('TWN')
"""