In [1]:
import geopandas as gpd
import pandas as pd
from osgeo import ogr,gdal
import os
import xarray as xr
import rasterio
import numpy as np
import pyproj
from pygeos import from_wkb,from_wkt
import pygeos
from tqdm import tqdm
from shapely.wkb import loads
from pathlib import Path
import glob
from shapely.geometry import mapping
pd.options.mode.chained_assignment = None
from rasterio.mask import mask
import rioxarray
import matplotlib.pyplot as plt
from scipy import integrate

import warnings
warnings.filterwarnings("ignore")

from scipy import integrate


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [16]:
gdal.SetConfigOption("OSM_CONFIG_FILE", os.path.join('..',"osmconf.ini"))

# change paths to make it work on your own machine
data_path = os.path.join('C:\\','Data','pg_risk_analysis')
tc_path = os.path.join(data_path,'tc_netcdf')
fl_path = os.path.join(data_path,'GLOFRIS')
osm_data_path = os.path.join('C:\\','Data','country_osm')
pg_data_path = os.path.join(data_path,'pg_data')
vul_curve_path = os.path.join(data_path,'vulnerability_curves','input_vulnerability_data.xlsx')
output_path = os.path.join('C:\\','projects','pg_risk_analysis','output')
ne_path = os.path.join(data_path,'..',"natural_earth","ne_10m_admin_0_countries.shp")

In [3]:
def query_b(geoType,keyCol,**valConstraint):
    """
    This function builds an SQL query from the values passed to the retrieve() function.
    Arguments:
         *geoType* : Type of geometry (osm layer) to search for.
         *keyCol* : A list of keys/columns that should be selected from the layer.
         ***valConstraint* : A dictionary of constraints for the values. e.g. WHERE 'value'>20 or 'value'='constraint'
    Returns:
        *string: : a SQL query string.
    """
    query = "SELECT " + "osm_id"
    for a in keyCol: query+= ","+ a  
    query += " FROM " + geoType + " WHERE "
    # If there are values in the dictionary, add constraint clauses
    if valConstraint: 
        for a in [*valConstraint]:
            # For each value of the key, add the constraint
            for b in valConstraint[a]: query += a + b
        query+= " AND "
    # Always ensures the first key/col provided is not Null.
    query+= ""+str(keyCol[0]) +" IS NOT NULL" 
    return query 


def retrieve(osm_path,geoType,keyCol,**valConstraint):
    """
    Function to extract specified geometry and keys/values from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.     
        *geoType* : Type of Geometry to retrieve. e.g. lines, multipolygons, etc.
        *keyCol* : These keys will be returned as columns in the dataframe.
        ***valConstraint: A dictionary specifiying the value constraints.  
        A key can have multiple values (as a list) for more than one constraint for key/value.  
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all columns, geometries, and constraints specified.    
    """
    driver=ogr.GetDriverByName('OSM')
    data = driver.Open(osm_path)
    query = query_b(geoType,keyCol,**valConstraint)
    sql_lyr = data.ExecuteSQL(query)
    features =[]
    # cl = columns 
    cl = ['osm_id'] 
    for a in keyCol: cl.append(a)
    if data is not None:
        print('query is finished, lets start the loop')
        for feature in tqdm(sql_lyr,desc='extract'):
            #try:
            if feature.GetField(keyCol[0]) is not None:
                geom1 = (feature.geometry().ExportToWkt())
                #print(geom1)
                geom = from_wkt(feature.geometry().ExportToWkt()) 
                if geom is None:
                    continue
                # field will become a row in the dataframe.
                field = []
                for i in cl: field.append(feature.GetField(i))
                field.append(geom)   
                features.append(field)
            #except:
            #    print("WARNING: skipped OSM feature")   
    else:
        print("ERROR: Nonetype error when requesting SQL. Check required.")    
    cl.append('geometry')                   
    if len(features) > 0:
        return pd.DataFrame(features,columns=cl)
    else:
        print("WARNING: No features or No Memory. returning empty GeoDataFrame") 
        return pd.DataFrame(columns=['osm_id','geometry'])

def power_polyline(osm_path):
    """
    Function to extract all energy linestrings from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'lines',['power','voltage'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #print(df) #check infra keys
    
    return df.reset_index(drop=True)

def power_polygon(osm_path): # check with joel, something was wrong here with extracting substations
    """
    Function to extract energy polygons from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'multipolygons',['other_tags']) 
    
    df = df.loc[(df.other_tags.str.contains('power'))]   #keep rows containing power data         
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
    
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
            
    return df.reset_index(drop=True) 

def electricity(osm_path):
    """
    Function to extract building polygons from OpenStreetMap    
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all unique building polygons.    
    """
    df = retrieve(osm_path,'multipolygons',['power'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #df = df[df.asset!='generator']
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
    #print(df)  #check infra keys
    
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
    
    return df.reset_index(drop=True)

def retrieve_poly_subs(osm_path, w_list, b_list):
    """
    Function to extract electricity substation polygons from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region
        for which we want to do the analysis.
        *w_list* :  white list of keywords to search in the other_tags columns
        *b_list* :  black list of keywords of rows that should not be selected
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique substation.
    """
    df = retrieve(osm_path,'multipolygons',['other_tags'])
    df = df[df.other_tags.str.contains('substation', case=False, na=False)]
    #df = df.loc[(df.other_tags.str.contains('substation'))]
    df = df[~df.other_tags.str.contains('|'.join(b_list))]
    #df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})
    df['asset']  = 'substation' #specify row
    #df = df.loc[(df.asset == 'substation')] #specify row
    return df.reset_index(drop=True)

def power_point(osm_path):
    """
    Function to extract energy points from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """   
    df = retrieve(osm_path,'points',['other_tags']) 
    df = df.loc[(df.other_tags.str.contains('power'))]  #keep rows containing power data       
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
        
    df['asset'].loc[df['asset'].str.contains('"power"=>"tower"', case=False)]  = 'power_tower' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"pole"', case=False)] = 'power_pole' #specify row
    #df['asset'].loc[df['asset'].str.contains('"utility"=>"power"', case=False)] = 'power_tower' #specify row
    
    df = df.loc[(df.asset == 'power_tower') | (df.asset == 'power_pole')]
            
    return df.reset_index(drop=True)

In [23]:
def reproject(df_ds, current_crs="epsg:4326", approximate_crs="epsg:3857"):

    # Extract the input geometries as a numpy array of coordinates
    geometries = df_ds['geometry']
    coords = pygeos.get_coordinates(geometries)

    # Transform the coordinates using pyproj
    transformer = pyproj.Transformer.from_crs(current_crs, approximate_crs, always_xy=True)
    new_coords = transformer.transform(coords[:, 0], coords[:, 1])

    # Create a new GeoSeries with the reprojected coordinates
    return pygeos.set_coordinates(geometries.copy(), np.array(new_coords).T)

def buffer_assets(assets, buffer_size=100):
    """
    Create a buffer of a specified size around the geometries in a GeoDataFrame.
    
    Args:
        assets (GeoDataFrame): A GeoDataFrame containing geometries to be buffered.
        buffer_size (int, optional): The distance in the units of the GeoDataFrame's CRS to buffer the geometries.
            Defaults to 100.
    
    Returns:
        GeoDataFrame: A new GeoDataFrame with an additional column named 'buffered' containing the buffered
            geometries.
    """
    # Create a buffer of the specified size around the geometries
    assets['buffered'] = pygeos.buffer(assets.geometry.values, buffer_size)
    
    return assets

def load_curves_maxdam(vul_curve_path,hazard_type):
    """[summary]

    Args:
        data_path ([type]): [description]

    Returns:
        [type]: [description]
    """

    if hazard_type == 'tc':
        sheet_name = 'wind_curves'
    
    elif hazard_type == 'fl':
        sheet_name = 'flooding_curves'
    
    # load curves and maximum damages as separate inputs
    curves = pd.read_excel(vul_curve_path,sheet_name=sheet_name,skiprows=10,index_col=[0])
    
    if hazard_type == 'fl':
        maxdam = pd.read_excel(vul_curve_path,sheet_name=sheet_name,index_col=[0]).iloc[:8]
    elif hazard_type == 'tc':
        maxdam = pd.read_excel(vul_curve_path,sheet_name=sheet_name,index_col=[0],header=[0,1]).iloc[:7]
        maxdam = maxdam.rename({'substation_point':'substation'},level=0,axis=1)
            
    curves.columns = maxdam.columns
        
    #transpose maxdam so its easier work with the dataframe
    maxdam = maxdam.T

    #interpolate the curves to fill missing values
    curves = curves.interpolate()
    
    #print(curves)
   
    return curves,maxdam


def overlay_hazard_assets(df_ds, assets):
    """
    Overlay a set of assets with a hazard dataset and return the subset of assets that intersect with
    one or more hazard polygons or lines.
    
    Args:
        df_ds (GeoDataFrame): A GeoDataFrame containing the hazard dataset.
        assets (GeoDataFrame): A GeoDataFrame containing the assets to be overlaid with the hazard dataset.
    
    Returns:
        ndarray: A numpy array of integers representing the indices of the hazard geometries that intersect with
            the assets. If the assets have a 'buffered' column, the buffered geometries are used for the overlay.
    """
    hazard_tree = pygeos.STRtree(df_ds.geometry.values)
    if (pygeos.get_type_id(assets.iloc[0].geometry) == 3) | (pygeos.get_type_id(assets.iloc[0].geometry) == 6):
        return  hazard_tree.query_bulk(assets.geometry,predicate='intersects')    
    else:
        return  hazard_tree.query_bulk(assets.buffered,predicate='intersects')
    
def get_damage_per_asset_per_rp(asset,df_ds,assets,curves,maxdam,return_period,country):
    """
    Calculates the damage per asset per return period based on asset type, hazard curves and maximum damage

    Args:
        asset (tuple): Tuple with two dictionaries, containing the asset index and the hazard point index of the asset
        df_ds (pandas.DataFrame): A pandas DataFrame containing hazard points with a 'geometry' column
        assets (geopandas.GeoDataFrame): A GeoDataFrame containing asset geometries and asset type information
        curves (dict): A dictionary with the asset types as keys and their corresponding hazard curves as values
        maxdam (pandas.DataFrame): A pandas DataFrame containing the maximum damage for each asset type
        return_period (str): The return period for which the damage should be calculated
        country (str): The country for which the damage should be calculated

    Returns:
        list or tuple: Depending on the input, the function either returns a list of tuples with the asset index, the curve name and the calculated damage, or a tuple with None, None, None if no hazard points are found
    """
    
    # find the exact hazard overlays:
    get_hazard_points = df_ds.iloc[asset[1]['hazard_point'].values].reset_index()
    get_hazard_points = get_hazard_points.loc[pygeos.intersects(get_hazard_points.geometry.values,assets.iloc[asset[0]].geometry)]

    
    asset_type = assets.iloc[asset[0]].asset
    asset_geom = assets.iloc[asset[0]].geometry

    if asset_type in ['plant','substation','generator']:
        #if plant,substation are points, do not calculate the area
        if pygeos.area(asset_geom) == 0:
            maxdam_asset = maxdam.loc[asset_type].MaxDam
            lowerdam_asset = maxdam.loc[asset_type].LowerDam
            upperdam_asset = maxdam.loc[asset_type].UpperDam
        else:
            maxdam_asset = maxdam.loc[asset_type].MaxDam/pygeos.area(asset_geom)
            lowerdam_asset = maxdam.loc[asset_type].LowerDam/pygeos.area(asset_geom)
            upperdam_asset = maxdam.loc[asset_type].UpperDam/pygeos.area(asset_geom)
    else:
        maxdam_asset = maxdam.loc[asset_type].MaxDam
        lowerdam_asset = maxdam.loc[asset_type].LowerDam
        upperdam_asset = maxdam.loc[asset_type].UpperDam


    hazard_intensity = curves[asset_type].index.values
    
    if isinstance(curves[asset_type],pd.core.series.Series):
        fragility_values = curves[asset_type].values.flatten()
        only_one = True
        curve_name = curves[asset_type].name
    elif len(curves[asset_type].columns) == 1:
        fragility_values = curves[asset_type].values.flatten()      
        only_one = True   
        curve_name = curves[asset_type].columns[0]
    else:
        fragility_values = curves[asset_type].values#.T[0]
        maxdam_asset = maxdam_asset.values#[0]
        only_one = False

    if len(get_hazard_points) == 0:
        return [return_period,asset[0],None,None]
    else:
        if only_one:    
            # run the calculation as normal when the asset just has a single curve
            if pygeos.get_type_id(asset_geom) == 1:            
                get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
                return [return_period,asset[0],curve_name,np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*maxdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*lowerdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*upperdam_asset)]

            elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
                return [return_period,asset[0],curve_name,get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*maxdam_asset*x.overlay_m2,axis=1).sum(),
                                                          get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*lowerdam_asset*x.overlay_m2,axis=1).sum(),
                                                          get_hazard_points.apply(lambda x: np.interp(x[return_period],hazard_intensity, 
                                                                  fragility_values)*upperdam_asset*x.overlay_m2,axis=1).sum()]  

            else:
                return [return_period,asset[0],curve_name,np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*maxdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*lowerdam_asset),
                                                          np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*upperdam_asset)]
        else:
            # run the calculation when the asset has multiple curves
            if pygeos.get_type_id(asset_geom) == 1:            
                get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            
            collect_all = []
            for iter_,curve_ids in enumerate(curves[asset_type].columns):
                if pygeos.get_type_id(asset_geom) == 1:
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*maxdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*lowerdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*get_hazard_points.overlay_meters*upperdam_asset[iter_])])
                                   
                elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*maxdam_asset[iter_]*x.overlay_m2,axis=1).sum(),
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*lowerdam_asset[iter_]*x.overlay_m2,axis=1).sum(),
                                        get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity,
                                                                                    fragility_values.T[iter_])*upperdam_asset[iter_]*x.overlay_m2,axis=1).sum()])

                else:
                    collect_all.append([return_period,asset[0],curves[asset_type].columns[iter_],
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*maxdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*lowerdam_asset[iter_]),
                                        np.sum((np.interp(get_hazard_points[return_period].values,
                                                          hazard_intensity,fragility_values.T[iter_]))*upperdam_asset[iter_])])
            return collect_all

In [9]:
load_curves_maxdam(vul_curve_path,'tc')[0]

Infrastructure type,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower,power_tower
Code,W3_1,W3_2,W3_3,W3_4,W3_5,W3_6,W3_7,W3_8,W3_9,W3_10,...,W3_17,W3_18,W3_19,W3_20,W3_21,W3_22,W3_23,W3_24,W3_25,W3_26
Wind speed (m/s),Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0.000000,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1.000000,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1.388889,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2.000000,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2.235200,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
285.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
290.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
295.000000,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [6]:
def open_storm_data(country_code):
    # list of available climate models
    climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

    # dictionary of basins for each country
    country_basin = {
        "BRN": ["WP"],
        "KHM": ["WP"],
        "CHN": ["WP", "NI"],
        "IDN": ["SI", "SP", "NI", "WP"],
        "JPN": ["WP"],
        "LAO": ["WP"],
        "MYS": ["WP", "NI"],
        "MNG": ["WP", "NI"],
        "MMR": ["NI", "WP"],
        "PRK": ["WP"],
        "PHL": ["WP"],
        "SGP": ["WP"],
        "KOR": ["WP"],
        "TWN": ["WP"],
        "THA": ["WP", "NI"],
        "VNM": ["WP"]
    }

    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file(os.path.join(data_path,'..',"natural_earth","ne_10m_admin_0_countries.shp"))
    bbox = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.buffer(1).values[0].bounds

    df_ds = {}
    for climate_model in climate_models:
        concat_prep = []

        #combine STORM data from different basins
        if "WP" in country_basin[country_code]:
            WP = load_storm_data(climate_model,'WP',bbox)
            concat_prep.append(WP)
        if "SP" in country_basin[country_code]:
            SP = load_storm_data(climate_model,'SP',bbox)
            concat_prep.append(SP)
        if "NI" in country_basin[country_code]:            
            NI = load_storm_data(climate_model,'NI',bbox)
            concat_prep.append(NI)            
        if "SI" in country_basin[country_code]:       
            SI = load_storm_data(climate_model,'SI',bbox)
            concat_prep.append(SI)            
                   
        df_ds_cl = pd.concat(concat_prep, keys=country_basin[country_code])
        df_ds_cl = df_ds_cl.reset_index(drop=True)
        df_ds[climate_model] = df_ds_cl

    return df_ds

def load_storm_data(climate_model,basin,bbox):

    filename = os.path.join(tc_path, f'STORM_FIXED_RETURN_PERIODS{climate_model}_{basin}.nc')
    
    # load data from NetCDF file
    with xr.open_dataset(filename) as ds:
        
        # convert data to WGS84 CRS
        ds.rio.write_crs(4326, inplace=True)
        ds = ds.rio.clip_box(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])
        ds['mean_3s'] = ds['mean']/0.88*1.11 #convert 10-min sustained wind speed to 3-s gust wind speed
        #print(ds.head())

        # get the mean values
        df_ds = ds['mean_3s'].to_dataframe().unstack(level=2).reset_index()

        # create geometry values and drop lat lon columns
        df_ds['geometry'] = [pygeos.points(x) for x in list(zip(df_ds['lon'], df_ds['lat']))]
        df_ds = df_ds.drop(['lat', 'lon'], axis=1, level=0)
        
        # interpolate wind speeds of 1,2,5,25,and 250-yr return period
        ## rename columns to return periods (must be integer for interpolating)
        df_ds_geometry = pd.DataFrame()
        df_ds_geometry['geometry'] = df_ds['geometry']
        df_ds = df_ds.drop(['geometry'], axis=1, level=0)
        df_ds = df_ds['mean_3s']
        df_ds.columns = [int(x) for x in ds['mean_3s']['rp']]
        df_ds[1] = np.nan
        df_ds[2] = np.nan
        df_ds[5] = np.nan
        df_ds[25] = np.nan
        df_ds[250] = np.nan
        df_ds = df_ds.reindex(sorted(df_ds.columns), axis=1)
        df_ds = df_ds.interpolate(method='linear', axis=1, limit_direction='both')
        df_ds['geometry'] = df_ds_geometry['geometry']
        df_ds = df_ds[[1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 'geometry']]
        
        
        # rename columns to return periods
        df_ds.columns = ['1_{}{}'.format(int(x), climate_model) for x in [1, 2, 5, 10, 25, 50, 100, 250, 500, 1000]] +['geometry']     
        df_ds['geometry'] = pygeos.buffer(df_ds.geometry, radius=0.1/2, cap_style='square').values
        #print(type(df_ds['geometry'][0]))
        # reproject the geometry column to the specified CRS
        df_ds['geometry'] = reproject(df_ds)
        #print(type(df_ds['geometry'][0]))
        #print(df_ds.head())
            
        # drop all non values to reduce size
        #df_ds = df_ds.loc[~df_ds['1_10000{}'.format(climate_model)].isna()].reset_index(drop=True)
        df_ds = df_ds.fillna(0)
        #print(type(df_ds['geometry'][0]))

    return df_ds

In [29]:
brn_wind=open_storm_data('BRN')
#print(type(twn_wind))

In [30]:
%%time
prk_wind['_HadGEM3-GC31-HM']

CPU times: total: 0 ns
Wall time: 0 ns


Unnamed: 0,1_1_HadGEM3-GC31-HM,1_2_HadGEM3-GC31-HM,1_5_HadGEM3-GC31-HM,1_10_HadGEM3-GC31-HM,1_25_HadGEM3-GC31-HM,1_50_HadGEM3-GC31-HM,1_100_HadGEM3-GC31-HM,1_250_HadGEM3-GC31-HM,1_500_HadGEM3-GC31-HM,1_1000_HadGEM3-GC31-HM,geometry
0,40.469543,40.469543,40.469543,40.469543,47.006968,51.136926,54.000687,57.706195,59.990146,61.671156,"POLYGON ((13725693.215 4397372.744, 13725693.2..."
1,40.559481,40.559481,40.559481,40.559481,46.998784,51.011116,54.276254,57.470437,59.796552,61.367880,"POLYGON ((13736825.164 4397372.744, 13736825.1..."
2,40.576730,40.576730,40.576730,40.576730,47.074321,50.719132,54.085564,57.368295,59.887502,61.455106,"POLYGON ((13747957.113 4397372.744, 13747957.1..."
3,40.619581,40.619581,40.619581,40.619581,47.139382,50.878822,53.868798,57.394859,59.747586,61.142801,"POLYGON ((13759089.062 4397372.744, 13759089.0..."
4,40.767273,40.767273,40.767273,40.767273,47.236443,50.914861,53.647720,57.101056,59.263981,61.107820,"POLYGON ((13770221.011 4397372.744, 13770221.0..."
...,...,...,...,...,...,...,...,...,...,...,...
6370,28.165473,28.165473,28.165473,28.165473,36.016688,41.022344,44.180265,48.861651,51.562568,54.256075,"POLYGON ((14616249.141 5480930.477, 14616249.1..."
6371,28.399414,28.399414,28.399414,28.399414,36.329825,41.091320,44.535876,48.959105,51.567495,54.432705,"POLYGON ((14627381.09 5480930.477, 14627381.09..."
6372,28.670507,28.670507,28.670507,28.670507,36.401315,40.999775,45.117046,49.259744,51.713517,54.310172,"POLYGON ((14638513.039 5480930.477, 14638513.0..."
6373,28.952827,28.952827,28.952827,28.952827,36.496510,41.032414,45.355875,49.466124,52.108267,54.097208,"POLYGON ((14649644.988 5480930.477, 14649644.9..."


In [7]:
def clip_flood_data(country_code):
    
    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file(ne_path)
    geometry = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.values[0]
    geoms = [mapping(geometry)]
    
    #climate_model: historical, rcp4p5, rcp8p5; time_period: hist, 2030, 2050, 2080
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    climate_models = ['historical','rcp8p5']
    
    #"/scistor/ivm/data_catalogue/open_street_map/pg_risk_analysis/GLOFRIS/global/inuncoast_historical_nosub_hist_rp0001_0.tif"

    for rp in rps:
        #global input_file
        for climate_model in climate_models:
            if climate_model=='historical':
                input_file = os.path.join(fl_path,'global',
                                          'inuncoast_{}_nosub_hist_rp{}_0.tif'.format(climate_model,rp)) 
 
            elif climate_model=='rcp8p5':
                input_file = os.path.join(fl_path,'global',
                                          'inuncoast_{}_nosub_2030_rp{}_0.tif'.format(climate_model,rp))

            # load raster file and save clipped version
            with rasterio.open(input_file) as src:
                out_image, out_transform = mask(src, geoms, crop=True)
                out_meta = src.meta

                out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

                if 'scistor' in fl_path:
                    file_path = os.path.join(fl_path,'country','_'.join([country_code]+input_file.split('_')[6:]))
                else:
                    file_path = os.path.join(fl_path,'country','_'.join([country_code]+input_file.split('_')[3:]))

                with rasterio.open(file_path, "w", **out_meta) as dest:
                    dest.write(out_image)

def load_flood_data(country_code,climate_model):
     
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    collect_df_ds = []
    
    if climate_model=='historical':
        print('Loading historical coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_hist_rp{}_0.tif'.format(country_code,climate_model,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                
                # move from meters to centimeters
                df_ds['rp'+rp] = (df_ds['rp'+rp]*100)         
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=0.00833/2,cap_style='square').values  #?????????????????????????
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])
        df_all = df_all.loc[df_all['rp1000']>0].reset_index(drop=True)

    elif climate_model=='rcp8p5':
        print('Loading future coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_2030_rp{}_0.tif'.format(country_code,climate_model,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                df_ds['rp'+rp] = (df_ds['rp'+rp]*100)
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=0.00833/2,cap_style='square').values
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])

        df_all = df_all.loc[df_all['rp1000']>0].reset_index(drop=True)
    return df_all

def open_flood_data(country_code):
    climate_models = ['historical','rcp8p5']
    df_ds = {}
    for climate_model in climate_models:
        df_ds_sc = load_flood_data(country_code,climate_model)
        df_ds[climate_model] = df_ds_sc
    
    return df_ds

In [277]:
clip_flood_data('JPN')

In [33]:
%%time
prk_flood = open_flood_data('PRK')

Loading historical coastal flood data ...
Loading future coastal flood data ...
CPU times: total: 1min 54s
Wall time: 1min 54s


In [34]:
print(type(prk_flood))

<class 'dict'>


In [35]:
prk_flood#['historical']

{'historical':          rp0001                                           geometry  \
 0      0.000000  POLYGON ((13844433.819 4850211.799, 13844433.8...   
 1    213.443848  POLYGON ((13846289.144 4881697.253, 13846289.1...   
 2     57.775497  POLYGON ((13846289.144 4862309.77, 13846289.14...   
 3      0.000000  POLYGON ((13846289.144 4861099.31, 13846289.14...   
 4      9.117842  POLYGON ((13846289.144 4859888.997, 13846289.1...   
 ..          ...                                                ...   
 372   67.400185  POLYGON ((14362069.452 4935209.743, 14362069.4...   
 373    0.304127  POLYGON ((14364852.439 4936429.321, 14364852.4...   
 374   59.260742  POLYGON ((14414946.21 4982886.071, 14414946.21...   
 375   69.419838  POLYGON ((14415873.872 4984111.602, 14415873.8...   
 376   70.671585  POLYGON ((14415873.872 4982886.071, 14415873.8...   
 
          rp0002      rp0005      rp0010      rp0025      rp0050      rp0100  \
 0      0.000000    0.000000    4.555368   11.911917

# OSM data processing

In [18]:
def extract_osm_infrastructure(country_code,osm_data_path):
    # lines
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_lines = power_polyline(osm_path)
    osm_lines['geometry'] = reproject(osm_lines)
    osm_lines = buffer_assets(osm_lines.loc[osm_lines.asset.isin(
        ['cable','minor_cable','line','minor_line'])],buffer_size=100).reset_index(drop=True)
    
    # polygons
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_polygons = electricity(osm_path)
    osm_polygons['geometry'] = reproject(osm_polygons)
    
    # points
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    osm_points = power_point(osm_path)
    osm_points['geometry'] = reproject(osm_points)
    osm_points = buffer_assets(osm_points.loc[osm_points.asset.isin(
        ['power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)
    
    return osm_lines,osm_polygons,osm_points


In [65]:
osm_power_infra = extract_osm_infrastructure('BRN',osm_data_path)

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████████| 87/87 [00:00<00:00, 470.26it/s]


query is finished, lets start the loop


extract: 100%|█████████████████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 82.34it/s]


query is finished, lets start the loop


extract: 100%|███████████████████████████████████████████████████████████████████| 8774/8774 [00:00<00:00, 9981.35it/s]


In [37]:
osm_power_infra

(         osm_id asset voltage  \
 0     105187949  line    None   
 1     105187955  line    None   
 2     105188022  line    None   
 3     107025236  line    None   
 4     123992103  line  154000   
 ..          ...   ...     ...   
 842  1082904916  line    None   
 843  1082904917  line    None   
 844  1085391998  line    None   
 845  1102076042  line  154000   
 846  1102076043  line  154000   
 
                                               geometry  \
 0    LINESTRING (14173655.447 4825037.842, 14173579...   
 1    LINESTRING (14178276.586 4800804.001, 14178162...   
 2    LINESTRING (14163631.45 4784350.366, 14163637....   
 3    LINESTRING (14176146.098 4797211.802, 14175880...   
 4    LINESTRING (14097165.409 4569256.468, 14097614...   
 ..                                                 ...   
 842  LINESTRING (13960258.8 4704246.146, 13960328.3...   
 843  LINESTRING (13963363.767 4700422.189, 13963348...   
 844  LINESTRING (13998362.482 4743470.643, 13998420...   


In [20]:
print(type(osm_power_infra))

<class 'tuple'>


In [77]:
def assess_damage_osm(country_code,osm_power_infra,hazard_type):

    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(vul_curve_path,hazard_type)
    
    # read infrastructure data:
    osm_lines,osm_poly,osm_points = osm_power_infra
    #print(osm_lines['asset'].unique())
    
    #calculate damaged lines/polygons/points in loop by climate_model
    damaged_lines = {}
    damaged_poly = {}
    damaged_points = {}

    if hazard_type=='tc':
        # read wind data
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        df_ds = open_storm_data(country_code)

        # remove assets that will not have any damage
        osm_lines = osm_lines.loc[osm_lines.asset != 'cable'].reset_index(drop=True)
        osm_lines['asset'] = osm_lines['asset'].replace(['minor_line'], 'line')
        osm_poly = osm_poly.loc[osm_poly.asset != 'plant'].reset_index(drop=True)
        
        for climate_model in climate_models:
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
            
            # assess damage for lines
            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_lines).T,
                                         columns=['asset','hazard_point'])

            if len(overlay_lines) == 0:
                damaged_lines[climate_model] = pd.DataFrame()

            else:
                collect_line_damages = []
                for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                                  desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                    for return_period in return_periods:
                        collect_line_damages.append(get_damage_per_asset_per_rp(asset,
                                                                                df_ds[climate_model],
                                                                                osm_lines,
                                                                                curves,
                                                                                maxdam,
                                                                                return_period,
                                                                                country_code))

                get_asset_type_line = dict(zip(osm_lines.index,osm_lines.asset))
                results = pd.DataFrame([item for sublist in collect_line_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

            # assess damage for polygons
            if len(osm_poly) > 0:
                overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_poly).T,
                                        columns=['asset','hazard_point'])
            else:
                overlay_poly = pd.DataFrame()

            if len(overlay_poly) == 0:
                damaged_poly[climate_model] = pd.DataFrame()

            else:
                collect_poly_damages = []
                for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),
                                  desc='polygon damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                    for return_period in return_periods:
                        collect_poly_damages.append(get_damage_per_asset_per_rp(asset,
                                                                                df_ds[climate_model],
                                                                                osm_poly,
                                                                                curves,
                                                                                maxdam,
                                                                                return_period,
                                                                                country_code))

                get_asset_type_poly = dict(zip(osm_poly.index,osm_poly.asset))
                
                results = pd.DataFrame([item for sublist in collect_poly_damages 
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_poly[x])    

                damaged_poly[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

            # assess damage for points
            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_points).T,
                                          columns=['asset','hazard_point'])

            if len(overlay_points) == 0:
                damaged_points[climate_model] = pd.DataFrame()

            else:
                collect_point_damages = []
                for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                                  desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                    for return_period in return_periods:
                        collect_point_damages.append(get_damage_per_asset_per_rp(asset,
                                                                                df_ds[climate_model],
                                                                                osm_points,
                                                                                curves,
                                                                                maxdam,
                                                                                return_period,
                                                                                country_code))

                get_asset_type_point = dict(zip(osm_points.index,osm_points.asset))
                
                results = pd.DataFrame([item for sublist in collect_point_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])    

                damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()


    elif hazard_type=='fl':
        # read flood data
        climate_models = ['historical','rcp8p5']
        df_ds = open_flood_data(country_code)
    
        for climate_model in climate_models:
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'] 

            # assess damage for lines
            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_lines).T,
                                         columns=['asset','hazard_point'])

            if len(overlay_lines) == 0:
                damaged_lines[climate_model] = pd.DataFrame()

            else:
                collect_line_damages = []
                for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                                  desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                    for return_period in return_periods:
                        collect_line_damages.append(get_damage_per_asset_per_rp(asset,
                                                                                df_ds[climate_model],
                                                                                osm_lines,
                                                                                curves,
                                                                                maxdam,
                                                                                return_period,
                                                                                country_code))

                get_asset_type_line = dict(zip(osm_lines.index,osm_lines.asset))
                results = pd.DataFrame(collect_line_damages,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                #sum damage of line, cable, and minor_line
                results['curve'] = results['curve'].replace(['cable', 'minor_line'], 'line')
                results['asset_type'] = results['asset_type'].replace(['cable', 'minor_line'], 'line')

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

            # assess damage for polygons
            if len(osm_poly) > 0:
                overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_poly).T,
                                        columns=['asset','hazard_point'])
            else:
                overlay_poly = pd.DataFrame()

            if len(overlay_poly) == 0:
                damaged_poly[climate_model] = pd.DataFrame()

            else:
                collect_poly_damages = []
                for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),
                                  desc='polygon damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                    for return_period in return_periods:
                        collect_poly_damages.append(get_damage_per_asset_per_rp(asset,
                                                                                df_ds[climate_model],
                                                                                osm_poly,
                                                                                curves,
                                                                                maxdam,
                                                                                return_period,
                                                                                country_code))

                get_asset_type_poly = dict(zip(osm_poly.index,osm_poly.asset))
                results = pd.DataFrame(collect_poly_damages ,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_poly[x])    

                damaged_poly[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

            # assess damage for points
            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],osm_points).T,
                                          columns=['asset','hazard_point'])

            if len(overlay_points) == 0:
                damaged_points[climate_model] = pd.DataFrame()

            else:
                collect_point_damages = []
                for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                                  desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                    for return_period in return_periods:
                        collect_point_damages.append(get_damage_per_asset_per_rp(asset,
                                                                                df_ds[climate_model],
                                                                                osm_points,
                                                                                curves,
                                                                                maxdam,
                                                                                return_period,
                                                                                country_code))

                get_asset_type_point = dict(zip(osm_points.index,osm_points.asset))
                results = pd.DataFrame(collect_point_damages ,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])    

                damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

    return damaged_lines,damaged_poly,damaged_points


In [59]:
osm_damage_infra = assess_damage_osm('PRK',osm_power_infra,'fl')

Loading historical coastal flood data ...
Loading future coastal flood data ...


point damage calculation for PRK fl (historical): 100%|████████████████████████████████| 41/41 [00:00<00:00, 60.38it/s]
point damage calculation for PRK fl (rcp8p5): 100%|████████████████████████████████████| 41/41 [00:00<00:00, 59.42it/s]


In [66]:
osm_damage_infra = assess_damage_osm('BRN',osm_power_infra,'tc')

polygon damage calculation for BRN tc (): 100%|██████████████████████████████████████████| 1/1 [00:00<00:00,  1.61it/s]
polygon damage calculation for BRN tc (_CMCC-CM2-VHR4): 100%|████████████████████████████| 1/1 [00:00<00:00,  1.62it/s]


In [74]:
# extract nested dict by key
osm_damage_infra[1]['_CMCC-CM2-VHR4']

Unnamed: 0,rp,curve,asset_type,meandam,lowerdam,upperdam
0,1_1000_CMCC-CM2-VHR4,W2_1_1,substation,0.018276,0.013707,0.022845
1,1_1000_CMCC-CM2-VHR4,W2_1_2,substation,0.036551,0.027413,0.045689
2,1_1000_CMCC-CM2-VHR4,W2_1_3,substation,0.091378,0.068534,0.114223
3,1_1000_CMCC-CM2-VHR4,W2_2_1,substation,0.000002,0.000002,0.000003
4,1_1000_CMCC-CM2-VHR4,W2_2_2,substation,0.000004,0.000003,0.000005
...,...,...,...,...,...,...
205,1_5_CMCC-CM2-VHR4,W2_6_2,substation,84270.363554,63202.772666,105337.954443
206,1_5_CMCC-CM2-VHR4,W2_6_3,substation,821.411243,616.058432,1026.764053
207,1_5_CMCC-CM2-VHR4,W2_7_1,substation,29009.357101,21757.017826,36261.696376
208,1_5_CMCC-CM2-VHR4,W2_7_2,substation,59462.163961,44596.622971,74327.704951


In [60]:
def country_analysis_osm(country_code,hazard_type): #
    
    # extract infrastructure data from OSM
    osm_power_infra = extract_osm_infrastructure(country_code,osm_data_path)
    
    # assess damage to hazard_type
    osm_damage_infra = assess_damage_osm(country_code,osm_power_infra,hazard_type)
    
    line_risk = {}
    plant_risk = {}
    substation_risk = {}
    tower_risk = {}
    pole_risk = {}

    if hazard_type=='tc':
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

        for i in range(len(osm_damage_infra)):
            for climate_model in climate_models:
                df = osm_damage_infra[i][climate_model]
                    
                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_osm_{}{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),
                                                '1_10{}'.format(climate_model),'1_25{}'.format(climate_model),'1_50{}'.format(climate_model),
                                                '1_100{}'.format(climate_model),'1_250{}'.format(climate_model),'1_500{}'.format(climate_model),
                                                '1_1000{}'.format(climate_model)],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])
                    
                    curve_code_substation = ['W2_1_1','W2_1_2','W2_1_3','W2_2_1','W2_2_2','W2_2_3','W2_3_1','W2_3_2','W2_3_3',
                                            'W2_4_1','W2_4_2','W2_4_3','W2_5_1','W2_5_2','W2_5_3','W2_6_1','W2_6_2','W2_6_3',
                                            'W2_7_1','W2_7_2','W2_7_3']
                    
                    curve_code_tower = ['W3_1','W3_2','W3_3','W3_4','W3_5','W3_6','W3_7','W3_8','W3_9','W3_10','W3_11','W3_12',
                                        'W3_13','W3_14','W3_15','W3_16','W3_17','W3_18','W3_19','W3_20','W3_21','W3_22','W3_22',
                                        'W3_24','W3_25','W3_26','W3_27','W3_28','W3_29','W3_30']
                    
                    curve_code_pole = ['W4_1','W4_2','W4_3','W4_4','W4_5','W4_6','W4_7','W4_8','W4_9','W4_10','W4_11','W4_12',
                                    'W4_13','W4_14','W4_15','W4_16','W4_17','W4_18','W4_19','W4_20','W4_21','W4_22','W4_23',
                                    'W4_24','W4_25','W4_26','W4_27','W4_28','W4_29','W4_30','W4_31','W4_32','W4_33','W4_34',
                                    'W4_35','W4_36','W4_37','W4_38','W4_39','W4_40','W4_41','W4_42','W4_43','W4_44','W4_45',
                                    'W4_46','W4_47','W4_48','W4_49','W4_50','W4_51','W4_52','W4_53','W4_54','W4_55']
                    
                    curve_code_line = ['W5_1','W5_2','W5_3']

                    #assess risk for power lines
                    if i == 0:
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = loss_list.rp.values.tolist()
                            line_risk[climate_model] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                            }
                            
                    #assess risk for power substations                
                    elif i == 1:                        
                        for curve_code in curve_code_substation:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = loss_list.rp.values.tolist()
                            substation_risk[climate_model] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                            }

                    #assess risk for power towers and power poles
                    elif i == 2:
                        for curve_code in curve_code_tower:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power towers ...")
                            
                            else:
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = df.loc[df['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                tower_risk[climate_model] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                        
                        for curve_code in curve_code_pole:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            if len(loss_list) == 0:
                                print("No risk of power poles ...")
                            
                            else:                    
                                loss_list_mean = loss_list.meandam.values.tolist()
                                loss_list_lower = loss_list.lowerdam.values.tolist()
                                loss_list_upper = loss_list.upperdam.values.tolist()
                                RPS = df.loc[df['curve'] == curve_code]
                                RPS = RPS.rp.values.tolist()
                                pole_risk[climate_model] = {
                                    'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                    'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                    'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
    
    elif hazard_type=='fl':
        climate_models = ['historical','rcp8p5']
    
        for i in range(len(osm_damage_infra)):
            for climate_model in climate_models:
                df = osm_damage_infra[i][climate_model]
                    
                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_osm_{}_{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])

                    #assess risk for power lines
                    if i == 0:
                        loss_list_mean = df.meandam.values.tolist()
                        loss_list_lower = df.lowerdam.values.tolist()
                        loss_list_upper = df.upperdam.values.tolist()
                        RPS = df.rp.values.tolist()
                        line_risk[climate_model] = {
                            'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                            'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                            'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                        }

                    #assess risk for power plants and substations                
                    elif i == 1:
                        loss_list = df.loc[df['asset_type'] == 'plant']
                        if len(loss_list) == 0:
                            print("No risk of plants ...")
                        
                        else:
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = df.loc[df['asset_type'] == 'plant']
                            RPS = RPS.rp.values.tolist()
                            plant_risk[climate_model] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                            }
                            
                        loss_list = df.loc[df['asset_type'] == 'substation']
                        if len(loss_list) == 0:
                            print("No risk of substations ...")
                        
                        else:
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = df.loc[df['asset_type'] == 'substation']
                            RPS = RPS.rp.values.tolist()
                            substation_risk[climate_model] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }

                    #assess risk for power towers and power poles
                    elif i == 2:
                        loss_list = df.loc[df['asset_type'] == 'power_tower']
                        if len(loss_list) == 0:
                            print("No risk of power towers ...")
                        
                        else:
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = df.loc[df['asset_type'] == 'power_tower']
                            RPS = RPS.rp.values.tolist()
                            tower_risk[climate_model] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                            }
                            
                        loss_list = df.loc[df['asset_type'] == 'power_pole']
                        if len(loss_list) == 0:
                            print("No risk of power poles ...")
                        
                        else:                    
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = df.loc[df['asset_type'] == 'power_pole']
                            RPS = RPS.rp.values.tolist()
                            pole_risk[climate_model] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                            }
                            
    return pd.DataFrame(line_risk),pd.DataFrame(plant_risk),pd.DataFrame(substation_risk),pd.DataFrame(tower_risk),pd.DataFrame(pole_risk)

In [104]:
%%time
osm_damage_infra = country_analysis_osm('BRN','tc')

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████████| 87/87 [00:00<00:00, 450.79it/s]


query is finished, lets start the loop


extract: 100%|█████████████████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 84.36it/s]


query is finished, lets start the loop


extract: 100%|██████████████████████████████████████████████████████████████████| 8774/8774 [00:00<00:00, 10444.73it/s]
polygon damage calculation for BRN tc (): 100%|██████████████████████████████████████████| 1/1 [00:00<00:00,  1.51it/s]
polygon damage calculation for BRN tc (_CMCC-CM2-VHR4): 100%|████████████████████████████| 1/1 [00:00<00:00,  1.57it/s]
polygon damage calculation for BRN tc (_CNRM-CM6-1-HR): 100%|████████████████████████████| 1/1 [00:00<00:00,  1.58it/s]
polygon damage calculation for BRN tc (_EC-Earth3P-HR): 100%|████████████████████████████| 1/1 [00:00<00:00,  1.54it/s]
polygon damage calculation for BRN tc (_HadGEM3-GC31-HM): 100%|██████████████████████████| 1/1 [00:00<00:00,  1.57it/s]


No tc_ risk of infra_type 0 in BRN
No tc__CMCC-CM2-VHR4 risk of infra_type 0 in BRN
No tc__CNRM-CM6-1-HR risk of infra_type 0 in BRN
No tc__EC-Earth3P-HR risk of infra_type 0 in BRN
No tc__HadGEM3-GC31-HM risk of infra_type 0 in BRN
No tc_ risk of infra_type 2 in BRN
No tc__CMCC-CM2-VHR4 risk of infra_type 2 in BRN
No tc__CNRM-CM6-1-HR risk of infra_type 2 in BRN
No tc__EC-Earth3P-HR risk of infra_type 2 in BRN
No tc__HadGEM3-GC31-HM risk of infra_type 2 in BRN
CPU times: total: 12.1 s
Wall time: 12.1 s


In [107]:
osm_damage_infra[2] #'','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM'

Unnamed: 0,Unnamed: 1,_CMCC-CM2-VHR4,_CNRM-CM6-1-HR,_EC-Earth3P-HR,_HadGEM3-GC31-HM
mean_risk,170556.628078,169519.571782,173071.127219,168483.870815,169359.116259
lower_risk,127917.471058,127139.678837,129803.345414,126362.903111,127019.337194
upper_risk,213195.785097,211899.464728,216338.909023,210604.838519,211698.895324


In [63]:
osm_damage_infra#[0]['historical']['mean']

({'historical': {'mean': 220072615.93097857,
   'lower': 165054461.94823387,
   'upper': 275090769.91372323},
  'rcp8p5': {'mean': 289554076.10272974,
   'lower': 217165557.07704726,
   'upper': 361942595.1284121}},
 {},
 {},
 {'historical': 132315.8115881768, 'rcp8p5': 179535.9734914216},
 {'historical': 0.0, 'rcp8p5': 0.48358291963805916})

# Government data processing

In [8]:
def extract_pg_infrastructure(country_code):
    files = [x for x in os.listdir(pg_data_path)  if country_code in x ]
    pg_types = ['line','point']
    
    for pg_type in pg_types:
        #print(os.path.isfile(os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))))
        if os.path.isfile(os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))):
            if pg_type=='line':
                for file in files: 
                    file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))

                    pg_data_country = gpd.read_file(file_path)
                    pg_data_country = pd.DataFrame(pg_data_country.copy())
                    pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
                    pg_data_country['geometry'] = reproject(pg_data_country)

                pg_lines = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['line'])],buffer_size=100).reset_index(drop=True)

            elif pg_type=='point':
                for file in files:
                    file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))

                    pg_data_country = gpd.read_file(file_path)
                    pg_data_country = pd.DataFrame(pg_data_country.copy())
                    pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
                    pg_data_country['geometry'] = reproject(pg_data_country)

                pg_points = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['plant','substation','power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)

    return pg_lines,pg_points

In [9]:
pg_infra = extract_pg_infrastructure('JPN')
print(type(pg_infra))

<class 'tuple'>


In [10]:
pg_infra[1]

Unnamed: 0,id,asset,geometry,buffered
0,1,substation,POINT (15911096.018 5305017.559),"POLYGON ((15911196.018 5305017.559, 15911194.0..."
1,2,substation,POINT (15830095.626 5281047.141),"POLYGON ((15830195.626 5281047.141, 15830193.7..."
2,3,substation,POINT (15784471.912 5372262.28),"POLYGON ((15784571.912 5372262.28, 15784569.99..."
3,4,substation,POINT (15725429.458 5296354.502),"POLYGON ((15725529.458 5296354.502, 15725527.5..."
4,5,substation,POINT (15678341.881 5273735.047),"POLYGON ((15678441.881 5273735.047, 15678439.9..."
...,...,...,...,...
63,64,substation,POINT (14654065.093 3892423.909),"POLYGON ((14654165.093 3892423.909, 14654163.1..."
64,65,substation,POINT (14567941.182 3869470.184),"POLYGON ((14568041.182 3869470.184, 14568039.2..."
65,66,substation,POINT (14554522.443 3832084.632),"POLYGON ((14554622.443 3832084.632, 14554620.5..."
66,67,substation,POINT (14530856.666 3761834.532),"POLYGON ((14530956.666 3761834.532, 14530954.7..."


In [74]:
def assess_damage_pg(country_code,pg_infra,hazard_type):

    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(vul_curve_path,hazard_type)
    
    # read infrastructure data:
    pg_lines,pg_points = pg_infra
    
    #calculate damaged lines/polygons/points in loop by climate_model
    damaged_lines = {}
    damaged_points = {}

    if hazard_type=='tc':
        # read wind data
        climate_models = ['_CMCC-CM2-VHR4','_CNRM-CM6-1-HR'] #'','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM'
        df_ds = open_storm_data(country_code)
        
        # remove assets that will not have any damage
        pg_points = pg_points.loc[pg_points.asset != 'plant'].reset_index(drop=True)
    
    elif hazard_type == 'fl':
        # read flood data
        climate_models = ['historical','rcp8p5']
        df_ds = open_flood_data(country_code)
        
    for climate_model in climate_models:
        if hazard_type=='tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'] 
            
        # assess damage for lines
        overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_lines).T,
                                     columns=['asset','hazard_point'])

        if len(overlay_lines) == 0:
            damaged_lines[climate_model] = pd.DataFrame()

        else:
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            pg_lines,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_line = dict(zip(pg_lines.index,pg_lines.asset))
            
            if hazard_type=='tc':
                results = pd.DataFrame([item for sublist in collect_line_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()
                
            elif hazard_type == 'fl':
                results = pd.DataFrame(collect_line_damages,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

                results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

                #sum damage of line, cable, and minor_line
                results['curve'] = results['curve'].replace(['cable', 'minor_line'], 'line')
                results['asset_type'] = results['asset_type'].replace(['cable', 'minor_line'], 'line')

                damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index() 

        # assess damage for points
        overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_points).T,
                                      columns=['asset','hazard_point'])

        if len(overlay_points) == 0:
            damaged_points[climate_model] = pd.DataFrame()

        else:
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append(get_damage_per_asset_per_rp(asset,
                                                                            df_ds[climate_model],
                                                                            pg_points,
                                                                            curves,
                                                                            maxdam,
                                                                            return_period,
                                                                            country_code))

            get_asset_type_point = dict(zip(pg_points.index,pg_points.asset))
            
            if hazard_type == 'tc':
                results = pd.DataFrame([item for sublist in collect_point_damages
                                        for item in sublist],columns=['rp','asset','curve','meandam','lowerdam','upperdam'])
            elif hazard_type == 'fl':
                results = pd.DataFrame(collect_point_damages ,columns=['rp','asset','curve','meandam','lowerdam','upperdam'])

            results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])    

            damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum().drop(['asset'], axis=1).reset_index()

                
    return damaged_lines,damaged_points

In [41]:
%%time
pg_damage_infra_tc = assess_damage_pg('JPN',pg_infra,'tc')

polyline damage calculation for JPN tc (): 100%|███████████████████████████████████████| 31/31 [00:03<00:00,  9.78it/s]
point damage calculation for JPN tc (): 100%|██████████████████████████████████████████| 68/68 [00:08<00:00,  7.87it/s]
polyline damage calculation for JPN tc (_CMCC-CM2-VHR4): 100%|█████████████████████████| 31/31 [00:03<00:00,  9.75it/s]
point damage calculation for JPN tc (_CMCC-CM2-VHR4): 100%|████████████████████████████| 68/68 [00:08<00:00,  7.79it/s]
polyline damage calculation for JPN tc (_CNRM-CM6-1-HR): 100%|█████████████████████████| 31/31 [00:03<00:00,  9.72it/s]
point damage calculation for JPN tc (_CNRM-CM6-1-HR): 100%|████████████████████████████| 68/68 [00:08<00:00,  7.76it/s]
polyline damage calculation for JPN tc (_EC-Earth3P-HR): 100%|█████████████████████████| 31/31 [00:03<00:00,  9.77it/s]
point damage calculation for JPN tc (_EC-Earth3P-HR): 100%|████████████████████████████| 68/68 [00:08<00:00,  7.71it/s]
polyline damage calculation for JPN tc (

CPU times: total: 1min 50s
Wall time: 1min 50s


In [45]:
pg_damage_infra_tc[1]['']

Unnamed: 0,rp,curve,asset_type,meandam,lowerdam,upperdam
0,1_1,W2_1_1,substation,2.638182e+05,1.978637e+05,3.297728e+05
1,1_1,W2_1_2,substation,5.276365e+05,3.957274e+05,6.595456e+05
2,1_1,W2_1_3,substation,1.319091e+06,9.893184e+05,1.648864e+06
3,1_1,W2_2_1,substation,1.826395e+04,1.369796e+04,2.282994e+04
4,1_1,W2_2_2,substation,3.652790e+04,2.739592e+04,4.565987e+04
...,...,...,...,...,...,...
205,1_500,W2_6_2,substation,2.243239e+08,1.682430e+08,2.804049e+08
206,1_500,W2_6_3,substation,5.608099e+08,4.206074e+08,7.010123e+08
207,1_500,W2_7_1,substation,4.358600e+07,3.268950e+07,5.448250e+07
208,1_500,W2_7_2,substation,8.717200e+07,6.537900e+07,1.089650e+08


In [33]:
%%time
pg_damage_infra_fl = assess_damage_pg('JPN',pg_infra,'fl')

Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for JPN fl (historical): 100%|███████████████████████████████| 5/5 [00:00<00:00, 25.00it/s]
point damage calculation for JPN fl (historical): 100%|██████████████████████████████████| 1/1 [00:00<00:00, 45.45it/s]
polyline damage calculation for JPN fl (rcp8p5): 100%|███████████████████████████████████| 5/5 [00:00<00:00, 24.04it/s]
point damage calculation for JPN fl (rcp8p5): 100%|██████████████████████████████████████| 1/1 [00:00<00:00, 50.00it/s]


CPU times: total: 8min 7s
Wall time: 8min 8s


In [34]:
pg_damage_infra_fl

({'historical':        rp       curve  asset_type        meandam       lowerdam       upperdam
  0  rp0001  substation  substation  490588.160384  367941.120288  613235.200480
  1  rp0002  substation  substation  517498.328609  388123.746457  646872.910762
  2  rp0005  substation  substation  559647.208366  419735.406274  699559.010457
  3  rp0010  substation  substation  579966.806294  434975.104721  724958.507868
  4  rp0025  substation  substation  608736.040167  456552.030125  760920.050208
  5  rp0050  substation  substation  632107.357808  474080.518356  790134.197260
  6  rp0100  substation  substation  652340.431603  489255.323702  815425.539504
  7  rp0250  substation  substation  682990.145845  512242.609384  853737.682306
  8  rp0500  substation  substation  699857.025260  524892.768945  874821.281575
  9  rp1000  substation  substation  721527.021566  541145.266174  901908.776957,
  'rcp8p5':        rp       curve  asset_type        meandam       lowerdam       upperdam
  0

In [179]:
def country_analysis_pg(country_code,hazard_type):
    
    # extract infrastructure data from gov data
    pg_power_infra = extract_pg_infrastructure(country_code)
    
    # assess damage to hazard_type
    pg_damage_infra = assess_damage_pg(country_code,pg_power_infra,hazard_type)
    
    line_risk = {}
    plant_risk = {}
    substation_risk = {}

    if hazard_type=='tc':
        climate_models = ['_CMCC-CM2-VHR4','_CNRM-CM6-1-HR'] #'','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM'

        for i in range(len(pg_damage_infra)):
            for climate_model in climate_models:
                df = pg_damage_infra[i][climate_model]
                    
                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_pg_{}{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),
                                                '1_10{}'.format(climate_model),'1_25{}'.format(climate_model),'1_50{}'.format(climate_model),
                                                '1_100{}'.format(climate_model),'1_250{}'.format(climate_model),'1_500{}'.format(climate_model),
                                                '1_1000{}'.format(climate_model)],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])
                    
                    curve_code_substation = ['W2_1_1','W2_1_2','W2_1_3','W2_2_1','W2_2_2','W2_2_3','W2_3_1','W2_3_2','W2_3_3',
                                            'W2_4_1','W2_4_2','W2_4_3','W2_5_1','W2_5_2','W2_5_3','W2_6_1','W2_6_2','W2_6_3',
                                            'W2_7_1','W2_7_2','W2_7_3']
                    
                    curve_code_line = ['W5_1','W5_2','W5_3']

                    #assess risk for power lines
                    if i == 0:
                        for curve_code in curve_code_line:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = loss_list.rp.values.tolist()
                            
                            line_risk[curve_code] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                            }
                            #print(line_risk_curve)
                    
                    #assess risk for power substations                
                    elif i == 1:                        
                        for curve_code in curve_code_substation:
                            loss_list = df.loc[df['curve'] == curve_code]
                            loss_list = loss_list.sort_values(by='rp',ascending=False)
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = loss_list.rp.values.tolist()
                            
                            substation_risk[curve_code] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                            }


    elif hazard_type=='fl':
        climate_models = ['historical','rcp8p5']
    
        for i in range(len(pg_damage_infra)):
            for climate_model in climate_models:
                df = pg_damage_infra[i][climate_model]
                    
                if len(df) == 0:
                    print("No {}_{} risk of infra_type {} in {}".format(hazard_type,climate_model,i,country_code))

                else:
                    with pd.ExcelWriter(os.path.join(output_path,'damage','{}_pg_{}_{}_damage_{}'.format(country_code,hazard_type,climate_model,i)+'.xlsx')) as writer:
                        df.to_excel(writer)

                    df['rp'] = df['rp'].replace(['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000'],
                                                [1,0.5,0.2,0.1,0.04,0.02,0.01,0.004,0.002,0.001])

                    #assess risk for power lines
                    if i == 0:
                        loss_list_mean = df.meandam.values.tolist()
                        loss_list_lower = df.lowerdam.values.tolist()
                        loss_list_upper = df.upperdam.values.tolist()
                        RPS = df.rp.values.tolist()
                        line_risk[climate_model] = {
                            'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                            'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                            'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                        }

                    #assess risk for power plants and substations                
                    elif i == 1:
                        loss_list = df.loc[df['asset_type'] == 'plant']
                        if len(loss_list) == 0:
                            print("No risk of plants ...")
                        
                        else:
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = df.loc[df['asset_type'] == 'plant']
                            RPS = RPS.rp.values.tolist()
                            plant_risk[climate_model] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                            }
                            
                        loss_list = df.loc[df['asset_type'] == 'substation']
                        if len(loss_list) == 0:
                            print("No risk of substations ...")
                        
                        else:
                            loss_list_mean = loss_list.meandam.values.tolist()
                            loss_list_lower = loss_list.lowerdam.values.tolist()
                            loss_list_upper = loss_list.upperdam.values.tolist()
                            RPS = df.loc[df['asset_type'] == 'substation']
                            RPS = RPS.rp.values.tolist()
                            substation_risk[climate_model] = {
                                'mean_risk': integrate.simps(y=loss_list_mean[::-1], x=RPS[::-1]),
                                'lower_risk': integrate.simps(y=loss_list_lower[::-1], x=RPS[::-1]),
                                'upper_risk': integrate.simps(y=loss_list_upper[::-1], x=RPS[::-1])
                                }
                            
    #return pd.DataFrame(line_risk),pd.DataFrame(plant_risk),pd.DataFrame(substation_risk)
    return line_risk,plant_risk,substation_risk

In [None]:
line_risk_dict = {}

for climate_model in climate_model_list:
    # Create a dataframe for the current climate model
    line_risk_curve = create_line_risk_curve(climate_model)
    
    # Create a dictionary to store the three curves for the current line risk curve
    curve_dict = {
        'curve_1': line_risk_curve['curve_1'],
        'curve_2': line_risk_curve['curve_2'],
        'curve_3': line_risk_curve['curve_3']
    }
    
    # Add the curve dictionary to the line risk dictionary using the climate model as the key
    line_risk_dict[climate_model] = curve_dict


In [None]:
pg_risk_tc = country_analysis_pg('JPN','tc')

In [178]:
pd.DataFrame(pg_risk_tc[2])

Unnamed: 0,_CMCC-CM2-VHR4,_CNRM-CM6-1-HR
0,W2_7_3 mean_risk 1.434933e...,W2_7_3 mean_risk 1.544563e...


In [60]:
pg_risk_fl = country_analysis_pg('JPN','fl')

Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for JPN fl (historical): 100%|███████████████████████████████| 5/5 [00:00<00:00, 24.88it/s]
point damage calculation for JPN fl (historical): 100%|██████████████████████████████████| 1/1 [00:00<00:00, 41.67it/s]
polyline damage calculation for JPN fl (rcp8p5): 100%|███████████████████████████████████| 5/5 [00:00<00:00, 23.80it/s]
point damage calculation for JPN fl (rcp8p5): 100%|██████████████████████████████████████| 1/1 [00:00<00:00, 49.92it/s]


No risk of plants ...
No risk of plants ...


# Save results

In [62]:
def risk_output(country_code,hazard_type,infra_type):
  
    if infra_type == 'osm':
        line_risk,plant_risk,substation_risk,tower_risk,pole_risk = country_analysis_osm(country_code,hazard_type)
        
        if hazard_type == 'tc':
            climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

            for climate_model in climate_models:

                # create a Pandas Excel writer using openpyxl engine
                writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}{}_risk'.format(country_code,infra_type,hazard_type,climate_model)+'.xlsx'),
                                        engine='openpyxl')
                
                # write each dataframe to a different sheet
                if len(line_risk) != 0:
                    line_risk[climate_model].to_excel(writer, sheet_name='line_risk')
                if len(substation_risk) != 0:
                    substation_risk[climate_model].to_excel(writer, sheet_name='substation_risk')
                if len(tower_risk) != 0:
                    tower_risk[climate_model].to_excel(writer, sheet_name='tower_risk')
                if len(pole_risk) != 0:
                    pole_risk[climate_model].to_excel(writer, sheet_name='pole_risk')
                
                # save the Excel file
                writer.save()

        elif hazard_type == 'fl':
            # create a Pandas Excel writer using openpyxl engine
            writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_risk'.format(country_code,infra_type,hazard_type)+'.xlsx'), engine='openpyxl')
            
            # write each dataframe to a different sheet
            if len(line_risk) != 0:
                line_risk.to_excel(writer, sheet_name='line_risk')
            if len(plant_risk) != 0:
                plant_risk.to_excel(writer, sheet_name='plant_risk')
            if len(substation_risk) != 0:
                substation_risk.to_excel(writer, sheet_name='substation_risk')
            if len(tower_risk) != 0:
                tower_risk.to_excel(writer, sheet_name='tower_risk')
            if len(pole_risk) != 0:
                pole_risk.to_excel(writer, sheet_name='pole_risk')
            
            # save the Excel file
            writer.save()

    elif infra_type == 'gov':
        line_risk,plant_risk,substation_risk = country_analysis_pg(country_code,hazard_type)
        
        if hazard_type == 'tc':
            climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']

            for climate_model in climate_models:

                # create a Pandas Excel writer using openpyxl engine
                writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}{}_risk'.format(country_code,infra_type,hazard_type,climate_model)+'.xlsx'),
                                        engine='openpyxl')
                
                # write each dataframe to a different sheet
                if len(line_risk) != 0:
                    line_risk[climate_model].to_excel(writer, sheet_name='line_risk')
                if len(substation_risk) != 0:
                    substation_risk[climate_model].to_excel(writer, sheet_name='substation_risk')
                    
                # save the Excel file
                writer.save()

        elif hazard_type == 'fl':
            # create a Pandas Excel writer using openpyxl engine
            writer = pd.ExcelWriter(os.path.join(output_path,'risk','{}_{}_{}_risk'.format(country_code,infra_type,hazard_type)+'.xlsx'), engine='openpyxl')
            
            # write each dataframe to a different sheet
            if len(line_risk) != 0:
                line_risk.to_excel(writer, sheet_name='line_risk')
            if len(plant_risk) != 0:
                plant_risk.to_excel(writer, sheet_name='plant_risk')
            if len(substation_risk) != 0:
                substation_risk.to_excel(writer, sheet_name='substation_risk')

            # save the Excel file
            writer.save()

In [63]:
risk_output('JPN','tc','gov')

polyline damage calculation for JPN tc (): 100%|███████████████████████████████████████| 31/31 [00:03<00:00,  9.50it/s]
point damage calculation for JPN tc (): 100%|██████████████████████████████████████████| 68/68 [00:09<00:00,  7.22it/s]
polyline damage calculation for JPN tc (_CMCC-CM2-VHR4): 100%|█████████████████████████| 31/31 [00:03<00:00,  9.05it/s]
point damage calculation for JPN tc (_CMCC-CM2-VHR4): 100%|████████████████████████████| 68/68 [00:09<00:00,  7.20it/s]
polyline damage calculation for JPN tc (_CNRM-CM6-1-HR): 100%|█████████████████████████| 31/31 [00:03<00:00,  9.49it/s]
point damage calculation for JPN tc (_CNRM-CM6-1-HR): 100%|████████████████████████████| 68/68 [00:09<00:00,  7.30it/s]
polyline damage calculation for JPN tc (_EC-Earth3P-HR): 100%|█████████████████████████| 31/31 [00:03<00:00,  9.07it/s]
point damage calculation for JPN tc (_EC-Earth3P-HR): 100%|████████████████████████████| 68/68 [00:09<00:00,  7.30it/s]
polyline damage calculation for JPN tc (

KeyError: ''

In [356]:
osm_damage_infra[1]['historical'].loc[osm_damage_infra[1]['historical']['asset_type'] == 'plant']

Unnamed: 0,rp,curve,asset_type,meandam,lowerdam,upperdam
0,1.0,plant,plant,1658665000.0,452363300.0,2261816000.0
2,0.5,plant,plant,1730781000.0,472031100.0,2360156000.0
4,0.2,plant,plant,2388258000.0,651343000.0,3256715000.0
6,0.1,plant,plant,2552940000.0,696256400.0,3481282000.0
8,0.04,plant,plant,2767454000.0,754760100.0,3773801000.0
10,0.02,plant,plant,2921201000.0,796691200.0,3983456000.0
12,0.01,plant,plant,3080762000.0,840207900.0,4201039000.0
14,0.004,plant,plant,3356950000.0,915532000.0,4577660000.0
16,0.002,plant,plant,3528684000.0,962368400.0,4811842000.0
18,0.001,plant,plant,3698459000.0,1008671000.0,5043354000.0


In [336]:
osm_damage_infra[0]['historical'].loc[:,"rp"]

0     1.000
1     1.000
2     1.000
3     0.500
4     0.500
5     0.500
6     0.200
7     0.200
8     0.200
9     0.100
10    0.100
11    0.100
12    0.040
13    0.040
14    0.040
15    0.020
16    0.020
17    0.020
18    0.010
19    0.010
20    0.010
21    0.004
22    0.004
23    0.004
24    0.002
25    0.002
26    0.002
27    0.001
28    0.001
29    0.001
Name: rp, dtype: float64

In [None]:
"""
def clip_gridfinder(country_code):
    base_map_path = os.path.join(data_path,'base_map')

    cty_boundary_path = os.path.join(base_map_path,'gadm41_{}.gpkg'.format(country_code))
    cty_boundary = gpd.read_file(cty_boundary_path)
    #mask = pd.DataFrame(mask.copy())
    #mask.geometry = pygeos.from_shapely(mask.geometry)
    #mask['geometry'] = reproject(mask)

    gridfinder_path = r'C:\Users\mye500\OneDrive - Vrije Universiteit Amsterdam\01_Research-Projects\01_risk_assessment\PG_data\gridfinder\grid.gpkg'
    gridfinder = gpd.read_file(gridfinder_path)
    #gridfinder = pd.DataFrame(gridfinder.copy())
    #gridfinder.geometry = pygeos.from_shapely(gridfinder.geometry)
    #gridfinder['geometry'] = reproject(gridfinder)

    clipped = gpd.clip(gridfinder,cty_boundary)

    return clipped

clip_gridfinder('TWN')
"""