In [None]:
import geopandas as gpd
import pandas as pd
from osgeo import ogr,gdal
import os
import xarray as xr
import rasterio
import numpy as np
import pyproj
from pygeos import from_wkb,from_wkt
import pygeos
from tqdm import tqdm
from shapely.wkb import loads
from pathlib import Path
import glob
from shapely.geometry import mapping
pd.options.mode.chained_assignment = None
from rasterio.mask import mask
import rioxarray
import matplotlib.pyplot as plt

In [17]:
gdal.SetConfigOption("OSM_CONFIG_FILE", os.path.join('..',"osmconf.ini"))

# change paths to make it work on your own machine
data_path = os.path.join('C:\\','data','pg_risk_analysis')
tc_path = os.path.join(data_path,'tc_netcdf')
fl_path = os.path.join(data_path,'GLOFRIS')
osm_data_path = os.path.join('C:\\','data','country_osm')
pg_data_path = os.path.join(data_path,'pg_data')
vul_curve_path = os.path.join(data_path,'vulnerability_curves','input_vulnerability_data.xlsx')
output_path = os.path.join('C:\\','projects','pg_risk_analysis','output')

In [3]:
def query_b(geoType,keyCol,**valConstraint):
    """
    This function builds an SQL query from the values passed to the retrieve() function.
    Arguments:
         *geoType* : Type of geometry (osm layer) to search for.
         *keyCol* : A list of keys/columns that should be selected from the layer.
         ***valConstraint* : A dictionary of constraints for the values. e.g. WHERE 'value'>20 or 'value'='constraint'
    Returns:
        *string: : a SQL query string.
    """
    query = "SELECT " + "osm_id"
    for a in keyCol: query+= ","+ a  
    query += " FROM " + geoType + " WHERE "
    # If there are values in the dictionary, add constraint clauses
    if valConstraint: 
        for a in [*valConstraint]:
            # For each value of the key, add the constraint
            for b in valConstraint[a]: query += a + b
        query+= " AND "
    # Always ensures the first key/col provided is not Null.
    query+= ""+str(keyCol[0]) +" IS NOT NULL" 
    return query 


def retrieve(osm_path,geoType,keyCol,**valConstraint):
    """
    Function to extract specified geometry and keys/values from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.     
        *geoType* : Type of Geometry to retrieve. e.g. lines, multipolygons, etc.
        *keyCol* : These keys will be returned as columns in the dataframe.
        ***valConstraint: A dictionary specifiying the value constraints.  
        A key can have multiple values (as a list) for more than one constraint for key/value.  
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all columns, geometries, and constraints specified.    
    """
    driver=ogr.GetDriverByName('OSM')
    data = driver.Open(osm_path)
    query = query_b(geoType,keyCol,**valConstraint)
    sql_lyr = data.ExecuteSQL(query)
    features =[]
    # cl = columns 
    cl = ['osm_id'] 
    for a in keyCol: cl.append(a)
    if data is not None:
        print('query is finished, lets start the loop')
        for feature in tqdm(sql_lyr,desc='extract'):
            #try:
            if feature.GetField(keyCol[0]) is not None:
                geom1 = (feature.geometry().ExportToWkt())
                #print(geom1)
                geom = from_wkt(feature.geometry().ExportToWkt()) 
                if geom is None:
                    continue
                # field will become a row in the dataframe.
                field = []
                for i in cl: field.append(feature.GetField(i))
                field.append(geom)   
                features.append(field)
            #except:
            #    print("WARNING: skipped OSM feature")   
    else:
        print("ERROR: Nonetype error when requesting SQL. Check required.")    
    cl.append('geometry')                   
    if len(features) > 0:
        return pd.DataFrame(features,columns=cl)
    else:
        print("WARNING: No features or No Memory. returning empty GeoDataFrame") 
        return pd.DataFrame(columns=['osm_id','geometry'])

def power_polyline(osm_path):
    """
    Function to extract all energy linestrings from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'lines',['power','voltage'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #print(df) #check infra keys
    
    return df.reset_index(drop=True)

def power_polygon(osm_path): # check with joel, something was wrong here with extracting substations
    """
    Function to extract energy polygons from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'multipolygons',['other_tags']) 
    
    df = df.loc[(df.other_tags.str.contains('power'))]   #keep rows containing power data         
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
    
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
            
    return df.reset_index(drop=True) 

def electricity(osm_path):
    """
    Function to extract building polygons from OpenStreetMap    
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all unique building polygons.    
    """
    df = retrieve(osm_path,'multipolygons',['power'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #df = df[df.asset!='generator']
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
    #print(df)  #check infra keys
    
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
    
    return df.reset_index(drop=True)

def retrieve_poly_subs(osm_path, w_list, b_list):
    """
    Function to extract electricity substation polygons from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region
        for which we want to do the analysis.
        *w_list* :  white list of keywords to search in the other_tags columns
        *b_list* :  black list of keywords of rows that should not be selected
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique substation.
    """
    df = retrieve(osm_path,'multipolygons',['other_tags'])
    df = df[df.other_tags.str.contains('substation', case=False, na=False)]
    #df = df.loc[(df.other_tags.str.contains('substation'))]
    df = df[~df.other_tags.str.contains('|'.join(b_list))]
    #df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})
    df['asset']  = 'substation' #specify row
    #df = df.loc[(df.asset == 'substation')] #specify row
    return df.reset_index(drop=True)

def power_point(osm_path):
    """
    Function to extract energy points from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """   
    df = retrieve(osm_path,'points',['other_tags']) 
    df = df.loc[(df.other_tags.str.contains('power'))]  #keep rows containing power data       
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
    
    #print(df)
    
    df['asset'].loc[df['asset'].str.contains('"power"=>"tower"', case=False)]  = 'power_tower' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"pole"', case=False)] = 'power_pole' #specify row
    #df['asset'].loc[df['asset'].str.contains('"utility"=>"power"', case=False)] = 'power_tower' #specify row
    
    df = df.loc[(df.asset == 'power_tower') | (df.asset == 'power_pole')]
            
    return df.reset_index(drop=True)

In [18]:
def reproject(df_ds,current_crs="epsg:4326",approximate_crs = "epsg:3857"):
    """[summary]

    Args:
        df_ds ([type]): [description]
        current_crs (str, optional): [description]. Defaults to "epsg:3857".
        approximate_crs (str, optional): [description]. Defaults to "epsg:4326".

    Returns:
        [type]: [description]
    """    

    geometries = df_ds['geometry']
    coords = pygeos.get_coordinates(geometries)
    transformer=pyproj.Transformer.from_crs(current_crs, approximate_crs,always_xy=True)
    new_coords = transformer.transform(coords[:, 0], coords[:, 1])
    
    return pygeos.set_coordinates(geometries.copy(), np.array(new_coords).T) 

def load_curves_maxdam(vul_curve_path,hazard_type):
    """[summary]

    Args:
        data_path ([type]): [description]

    Returns:
        [type]: [description]
    """

    if hazard_type == 'tc':
        sheet_name = 'wind_curves'
    
    elif hazard_type == 'fl':
        sheet_name = 'flooding_curves'
    
    # load curves and maximum damages as separate inputs
    curves = pd.read_excel(vul_curve_path,sheet_name=sheet_name,skiprows=11,index_col=[0])
    maxdam = pd.read_excel(vul_curve_path,sheet_name=sheet_name,index_col=[0]).iloc[:8]
    #print(curves)
    
    curves.columns = maxdam.columns

    #transpose maxdam so its easier work with the dataframe
    maxdam = maxdam.T

    #interpolate the curves to fill missing values
    curves = curves.interpolate()
    
    #print(curves)
   
    return curves,maxdam

def buffer_assets(assets,buffer_size=100):
    """[summary]

    Args:
        assets ([type]): [description]
        buffer_size (int, optional): [description]. Defaults to 100.

    Returns:
        [type]: [description]
    """    
    assets['buffered'] = pygeos.buffer(assets.geometry.values,buffer_size)
    return assets

def overlay_hazard_assets(df_ds,assets):
    """[summary]

    Args:
        df_ds ([type]): [description]
        assets ([type]): [description]

    Returns:
        [type]: [description]
    """
    #overlay 
    hazard_tree = pygeos.STRtree(df_ds.geometry.values)
    if (pygeos.get_type_id(assets.iloc[0].geometry) == 3) | (pygeos.get_type_id(assets.iloc[0].geometry) == 6):
        return  hazard_tree.query_bulk(assets.geometry,predicate='intersects')    
    else:
        return  hazard_tree.query_bulk(assets.buffered,predicate='intersects')
    
def get_damage_per_asset_per_rp(asset,df_ds,assets,curves,maxdam,return_period,country):
    """[summary]

    Args:
        asset ([type]): [description]
        df_ds ([type]): [description]
        assets ([type]): [description]
        grid_size (int, optional): [description]. Defaults to 90.

    Returns:
        [type]: [description]
    """    

    # find the exact hazard overlays:
    get_hazard_points = df_ds.iloc[asset[1]['hazard_point'].values].reset_index()
    get_hazard_points = get_hazard_points.loc[pygeos.intersects(get_hazard_points.geometry.values,assets.iloc[asset[0]].geometry)]

    asset_type = assets.iloc[asset[0]].asset
    asset_geom = assets.iloc[asset[0]].geometry

    if asset_type in ['plant','substation','generator']:
        maxdam_asset = maxdam.loc[asset_type].MaxDam/pygeos.area(asset_geom)
    else:
        maxdam_asset = maxdam.loc[asset_type].MaxDam


    hazard_intensity = curves[asset_type].index.values
    fragility_values = curves[asset_type].values
    
    if len(get_hazard_points) == 0:
        return asset[0],0
    else:
        
        if pygeos.get_type_id(asset_geom) == 1:
            get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            return asset[0],np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,fragility_values))*get_hazard_points.overlay_meters*maxdam_asset)
        
        elif  pygeos.get_type_id(asset_geom) == 3:
            get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            return asset[0],get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity, fragility_values)*maxdam_asset*x.overlay_m2,axis=1).sum()     
        
        else:
            return asset[0],np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,fragility_values))*maxdam_asset)

In [5]:
load_curves_maxdam(vul_curve_path,hazard_type='tc')

(                  power_tower  power_tower.1  power_tower.2  power_tower.3  \
 Wind speed (m/s)                                                             
 0.000000                  0.0            0.0            0.0            0.0   
 1.000000                  0.0            0.0            0.0            0.0   
 1.388889                  0.0            0.0            0.0            0.0   
 2.000000                  0.0            0.0            0.0            0.0   
 2.235200                  0.0            0.0            0.0            0.0   
 ...                       ...            ...            ...            ...   
 380.000000                1.0            1.0            1.0            1.0   
 385.000000                1.0            1.0            1.0            1.0   
 390.000000                1.0            1.0            1.0            1.0   
 395.000000                1.0            1.0            1.0            1.0   
 400.000000                1.0            1.0       

In [None]:
def load_storm_data(climate_model,basin):
    
    with xr.open_dataset(os.path.join(tc_path,'STORM_FIXED_RETURN_PERIODS{}_{}.nc'.format(climate_model,basin))) as ds:
        
        # get the mean values
        df_ds = ds['mean'].to_dataframe().unstack(level=2).reset_index()

        # create geometry values and drop lat lon columns
        df_ds['geometry'] = [pygeos.points(x) for x in list(zip(df_ds['lon'],df_ds['lat']))]
        df_ds = df_ds.drop(['lat','lon'],axis=1,level=0)
        
        # interpolate wind speeds of 1, 2, and 5-yr return period
        ## rename columns to return periods (must be integer for interpolating)
        df_ds_geometry = pd.DataFrame()
        df_ds_geometry['geometry'] = df_ds['geometry']
        df_ds = df_ds.drop(['geometry'],axis=1,level=0)
        df_ds.columns = [int(x) for x in ds['rp']]
        df_ds[1] = np.nan
        df_ds[2] = np.nan
        df_ds[5] = np.nan
        df_ds[25] = np.nan
        df_ds[250] = np.nan
        df_ds = df_ds.reindex(sorted(df_ds.columns), axis=1)
        df_ds = df_ds.interpolate(method='pchip',axis=1,limit_direction='both')
        df_ds['geometry'] = df_ds_geometry['geometry']
        #df_ds = df_ds[['1','2','5','10','25','50','100','250','500','1000','geometry']]
        df_ds = df_ds[[1,2,5,10,25,50,100,250,500,1000,'geometry']]
        
        #rename columns to return periods
        #return_periods = ['1_{}{}'.format(int(x),climate_model) for x in ds['rp']]
        #df_ds.columns = ['1_{}{}'.format(int(x),climate_model) for x in [1,2,5,10,25,50,100,250,500,1000]] +['geometry']     
        df_ds['geometry'] = pygeos.buffer(df_ds.geometry,radius=0.1/2,cap_style='square').values
        df_ds['geometry'] = reproject(df_ds)
            
        # drop all non values to reduce size
        #df_ds = df_ds.loc[~df_ds['1_10000{}'.format(climate_model)].isna()].reset_index(drop=True)
        df_ds = df_ds.fillna(0)
        #df_ds = df_ds[['1','2','5','10','25','50','100','250','500','1000']]
        #df_ds = df_ds[['1_{}{}'.format(int(x),climate_model) for x in list(df_ds.columns.get_level_values(0))[:-1]]+['geometry']]

    return df_ds

def open_storm_data():
    climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
    df_ds = {}
    for climate_model in climate_models:
        #combine STORM data from different basins
        wp = load_storm_data(climate_model,'WP')
        sp = load_storm_data(climate_model,'SP')
        ni = load_storm_data(climate_model,'NI')
        si = load_storm_data(climate_model,'SI')
        df_ds_cl = pd.concat([wp,sp,ni,si],keys=['wp','sp','ni','si'])

        df_ds_cl = df_ds_cl.reset_index(drop=True)
        df_ds[climate_model] = df_ds_cl
    
    return df_ds

In [None]:
load_storm_data('_CMCC-CM2-VHR4','WP')

In [40]:
%%time
open_storm_data()

KeyboardInterrupt: 

In [8]:
def clip_flood_data(country_code):

    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file('C:\\Data\\natural_earth\\ne_10m_admin_0_countries.shp') 
    geometry = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.values[0]
    geoms = [mapping(geometry)]
    
    #climate_model: historical, rcp4p5, rcp8p5; time_period: hist, 2030, 2050, 2080
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    climate_models = ['historical','rcp8p5']
    
    
    for rp in rps:
        #global input_file
        for climate_model in climate_models:
            if climate_model=='historical':
                #f rps=='0001':
                    input_file = os.path.join(fl_path,'global',
                                              'inuncoast_{}_nosub_hist_rp{}_0.tif'.format(climate_model,rp)) 
                #elif rps==['0002','0005','0010','0025','0050','0100','0250','0500','1000']:
                #    input_file = os.path.join(fl_path,'global',
                #                              'inuncoast_{}_nosub_hist_rp{}_0.tif'.format(climate_model,rp)) 
            elif climate_model=='rcp8p5':
                #f rps=='0001':
                    input_file = os.path.join(fl_path,'global',
                                              'inuncoast_{}_nosub_2030_rp{}_0.tif'.format(climate_model,rp))
                #elif rps==['0002','0005','0010','0025','0050','0100','0250','0500','1000']:
                #    input_file = os.path.join(fl_path,'global',
                #                              'inuncoast_{}_nosub_2030_rp{}_0.tif'.format(climate_model,rp))
            
            # load raster file and save clipped version
            with rasterio.open(input_file) as src:
                out_image, out_transform = mask(src, geoms, crop=True)
                out_meta = src.meta

                out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

                file_path = os.path.join(fl_path,'country','_'.join([country_code]+input_file.split('_')[3:]))

                with rasterio.open(file_path, "w", **out_meta) as dest:
                    dest.write(out_image)

def load_flood_data(country_code,scenario_type):
    files = [x for x in os.listdir(os.path.join(fl_path,'country'))  if country_code in x ]
    
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    collect_df_ds = []
    
    if scenario_type=='historical':
        print('Loading historical coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_hist_rp{}_0.tif'.format(country_code,scenario_type,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=20/2,cap_style='square').values  #?????????????????????????
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])

    elif scenario_type=='rcp8p5':
        print('Loading future coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_2030_rp{}_0.tif'.format(country_code,scenario_type,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=20/2,cap_style='square').values
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])

    return df_all

def open_flood_data(country_code):
    scenario_types = ['historical','rcp8p5']
    df_ds = {}
    for scenario_type in scenario_types:
        #hist = load_flood_data(country_code,'historical')
        #rcp8p5 = load_flood_data(country_code,'rcp8p5')
        #df_ds_sc = pd.concat([hist,rcp8p5],keys=['historical','rcp8p5'])
        df_ds_sc = load_flood_data(country_code,scenario_type)

        df_ds[scenario_type] = df_ds_sc
    
    return df_ds

In [10]:
clip_flood_data('TWN')

In [23]:
%%time
twn_flood = open_flood_data('TWN')
#print(type(lao_flood))
twn_flood

Loading historical coastal flood data ...
Loading future coastal flood data ...
CPU times: total: 30.2 s
Wall time: 30.2 s


{'historical':          rp0001                                           geometry    rp0002  \
 0      1.764207  POLYGON ((14280899.175 4092200.019, 14280899.1...  1.908693   
 1      0.000000  POLYGON ((14280899.175 4084326.103, 14280899.1...  0.000000   
 2      0.000000  POLYGON ((14280899.175 4083201.706, 14280899.1...  0.000000   
 3      0.000000  POLYGON ((14281826.838 4093325.313, 14281826.8...  0.000000   
 4      0.000000  POLYGON ((14281826.838 4092200.019, 14281826.8...  0.000000   
 ...         ...                                                ...       ...   
 45721  0.000000  POLYGON ((14691853.629 4163314.925, 14691853.6...  0.000000   
 45722  0.000000  POLYGON ((14692781.291 4165579.974, 14692781.2...  0.000000   
 45723  0.000000  POLYGON ((14692781.291 4164447.392, 14692781.2...  0.000000   
 45724  0.000000  POLYGON ((14692781.291 4163314.925, 14692781.2...  0.000000   
 45725  0.000000  POLYGON ((14693708.954 4164447.392, 14693708.9...  0.000000   
 
          rp

# OSM data processing

In [11]:
def extract_osm_infrastructure(country_code,osm_data_path):

    # lines
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_lines_country = power_polyline(osm_path)
    power_lines_country['geometry'] = reproject(power_lines_country)
    power_lines_country = buffer_assets(power_lines_country.loc[power_lines_country.asset.isin(
        ['cable','minor_cable','line','minor_line'])],buffer_size=100).reset_index(drop=True)
    
    # polygons
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_poly_country = electricity(osm_path)
    power_poly_country['geometry'] = reproject(power_poly_country)
    
    # points
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_points_country = power_point(osm_path)
    power_points_country['geometry'] = reproject(power_points_country)
    power_points_country = buffer_assets(power_points_country.loc[power_points_country.asset.isin(
        ['power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)
    #print(power_points_country)
    #print(type(power_points_country))

    return power_lines_country,power_poly_country,power_points_country


osm_power_infra = extract_osm_infrastructure('TWN',osm_data_path)
#print(type(osm_power_infra))

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████| 2594/2594 [00:08<00:00, 323.80it/s]


query is finished, lets start the loop


extract: 100%|███████████████████████████████████████████████████████████████████████| 244/244 [00:18<00:00, 13.44it/s]


query is finished, lets start the loop


extract: 100%|█████████████████████████████████████████████████████████████| 1608621/1608621 [02:55<00:00, 9159.37it/s]


In [12]:
osm_power_infra

(          osm_id asset voltage  \
 0       52174646  line  161000   
 1       52174647  line    None   
 2       52174648  line  161000   
 3       52176335  line    None   
 4       52176336  line    None   
 ...          ...   ...     ...   
 1746  1098796452  line  161000   
 1747  1098796453  line  161000   
 1748  1098796454  line   69000   
 1749  1098796455  line   69000   
 1750  1100557068  line    None   
 
                                                geometry  \
 0     LINESTRING (13524085.736 2879026.882, 13523921...   
 1     LINESTRING (13523921.651 2879313.221, 13524074...   
 2     LINESTRING (13522366.484 2874157.801, 13522432...   
 3     LINESTRING (13525160.859 2876713.703, 13525168...   
 4     LINESTRING (13528063.994 2877032.402, 13528287...   
 ...                                                 ...   
 1746  LINESTRING (13481629.539 2880939.954, 13481633...   
 1747  LINESTRING (13481629.539 2880939.954, 13481624...   
 1748  LINESTRING (13484630.78 2882484

In [19]:
def assess_damage_osm(country_code,osm_power_infra,hazard_type):
    
    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(vul_curve_path,hazard_type)
    curves['line'] = 1 # remove this when things work!
    
    # read infrastructure data:
    power_lines,power_poly,power_points = osm_power_infra
    #print(type(power_lines))
    #print(type(osm_power_infra))
    
    if hazard_type=='tc':
        # read wind data
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        df_ds = open_storm_data()
        
        # calculate damaged lines in loop by climate_model
        damaged_lines = {}
        for climate_model in climate_models:
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],power_lines).T,
                                         columns=['asset','hazard_point'])
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[climate_model],
                                                                                           power_lines,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])

        collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
        damaged_lines_country = power_lines.merge(pd.DataFrame(collect_line_damages,columns=['return_period','index','damage']),
                                                  left_index=True,right_on='index')
        damaged_lines_country = damaged_lines_country.drop(['buffered'],axis=1)
        damaged_lines[climate_model] = damaged_lines_country
        
        # calculate damaged polygons in loop by country_code and climate_model
        damaged_poly = {}
        for climate_model in climate_models:
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],power_poly).T,
                                        columns=['asset','hazard_point'])
            collect_poly_damages = []
            for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),
                              desc='polygon damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_poly_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[climate_model],
                                                                                           power_poly,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])

        collect_poly_damages = [(line[0],line[1][0],line[1][1]) for line in collect_poly_damages]
        #print(collect_poly_damages[0])
        damaged_poly_country = power_poly.merge(pd.DataFrame(collect_poly_damages,columns=['return_period','index','damage']),
                                                left_index=True,right_on='index')
        damaged_poly[climate_model] = damaged_poly_country
        
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        for climate_model in climate_models:
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
            
            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],power_points).T,
                                          columns=['asset','hazard_point'])
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                            df_ds[climate_model],
                                                                                            power_points,
                                                                                            curves,
                                                                                            maxdam,
                                                                                            return_period,
                                                                                            country_code)])

        collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
        damaged_points_country = power_points.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),
                                                    left_index=True,right_on='index')
        damaged_points_country = damaged_points_country.drop(['buffered'],axis=1)
        damaged_points[climate_model] = damaged_points_country
        
    elif hazard_type=='fl':
        # read flood data
        scenario_types = ('historical','rcp8p5')
        df_ds = open_flood_data(country_code) #['historical'].head(30) # REMOVE .HEAD(30)
        #time_periods = []
        
        #for time_period in time_periods:
        return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000']
        
        # calculate damaged lines in loop by country_code and climate_model
        damaged_lines = {}
        for scenario_type in scenario_types:
            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[scenario_type],power_lines).T,columns=['asset','hazard_point'])
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,scenario_type)):
                for return_period in return_periods:
                    collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[scenario_type],
                                                                                           power_lines,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])

            #print(collect_line_damages[0])
        collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
        damaged_lines_country = power_lines.merge(pd.DataFrame(collect_line_damages,columns=['return_period','index','damage']),
                                                  left_index=True,right_on='index')
        damaged_lines = damaged_lines_country.drop(['buffered'],axis=1)
        
        # calculate damaged polygons in loop by country_code and climate_model
        damaged_poly = {}
        for scenario_type in scenario_types:
            overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds[scenario_type],power_poly).T,
                                                              columns=['asset','hazard_point'])
            collect_poly_damages = []
            for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),
                              desc='polygon damage calculation for {} {} ({})'.format(country_code,hazard_type,scenario_type)):
                for return_period in return_periods:
                    collect_poly_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[scenario_type],
                                                                                           power_poly,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])

        collect_poly_damages = [(line[0],line[1][0],line[1][1]) for line in collect_poly_damages]
        #print(collect_poly_damages[0])
        damaged_poly_country = power_poly.merge(pd.DataFrame(collect_poly_damages,columns=['return_period','index','damage']),
                                                left_index=True,right_on='index')
        damaged_poly = damaged_poly_country
        
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        for scenario_type in scenario_types:
            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[scenario_type],power_points).T,
                                          columns=['asset','hazard_point'])
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,scenario_type)):
                for return_period in return_periods:
                    collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                            df_ds[scenario_type],
                                                                                            power_points,
                                                                                            curves,
                                                                                            maxdam,
                                                                                            return_period,
                                                                                            country_code)])

                #print(collect_point_damages[1][1])
        collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
        damaged_points_country = power_points.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),
                                                    left_index=True,right_on='index')
        damaged_points = damaged_points_country.drop(['buffered'],axis=1)

    return damaged_lines,damaged_poly,damaged_points

osm_damage_infra = assess_damage_osm('TWN',osm_power_infra,'fl')

Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for TWN fl (historical):  18%|████                  | 319/1751 [1:18:47<5:53:41, 14.82s/it]


KeyboardInterrupt: 

In [None]:
osm_damage_infra

In [None]:
def country_analysis_osm(country_code,hazard_type): #
    
    # extract infrastructure data from OSM
    osm_power_infra = extract_osm_infrastructure(country_code,osm_data_path)
    
    # assess damage to hazard_type
    osm_damage_infra = assess_damage_osm(country_code,osm_power_infra,hazard_type)

    return osm_damage_infra
    
    
osm_damage_infra = country_analysis_osm('TWN','tc') #,'line','PG'

In [None]:
#osm_damage_infra[1]['_CNRM-CM6-1-HR']
osm_damage_infra

# Government data processing

In [7]:
# load collected power grid data
def extract_pg_data(country_code,pg_type):
    files = [x for x in os.listdir(pg_data_path)  if country_code in x ]
    
    if pg_type=='line':
        for file in files: 
            file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))

            pg_data_country = gpd.read_file(file_path)
            pg_data_country = pd.DataFrame(pg_data_country.copy())
            #print(pg_data_country.head())
            pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
            pg_data_country['geometry'] = reproject(pg_data_country)

        pg_data_country = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['line'])],buffer_size=100).reset_index(drop=True)

    elif pg_type=='point':
        for file in files:
            file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))
                
            pg_data_country = gpd.read_file(file_path)
            pg_data_country = pd.DataFrame(pg_data_country.copy())
            #print(pg_data_country.head())
            pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
            pg_data_country['geometry'] = reproject(pg_data_country)
            #print(pg_data_country)

        pg_data_country = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['plant_point','substation_point','power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)

    return pg_data_country

def open_pg_data(country_code):
    pg_lines = extract_pg_data(country_code,'line')
    pg_points = extract_pg_data(country_code,'point')
    #print(pg_points)
    return pg_lines,pg_points

pg_infra = open_pg_data('LAO')

In [None]:
def assess_damage_pg(country_code,pg_infra,hazard_type):
    
    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(os.path.join(vul_curve_path,'infra_vulnerability_data.xlsx'))
    #curves['line'] = 1 # remove this when things work!
    
    # read infrastructure data:
    pg_lines,pg_points = pg_infra
    #print(type(pg_points))
    #print(type(pg_infra))
    
    pg_lines.head(5)
    pg_points.head(5)
    
    if hazard_type=='tc':
        # read wind data
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM'] # !!!!!!!!!!!!!!!!!!!!!!!!!!!
        df_ds = open_storm_data()
        
        # calculate damaged lines in loop by country_code and climate_model
        damaged_lines = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],
                                                               pg_lines).T,columns=['asset','hazard_point'])
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[climate_model],
                                                                                           pg_lines,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])

        collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
        damaged_lines_country = pg_lines.merge(pd.DataFrame(collect_line_damages,columns=['return_period','index','damage']),
                                                  left_index=True,right_on='index')
        damaged_lines_country = damaged_lines_country.drop(['buffered'],axis=1)
        damaged_lines[climate_model] = damaged_lines_country
        
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_points).T,
                                          columns=['asset','hazard_point'])
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                            df_ds[climate_model],
                                                                                            pg_points,
                                                                                            curves,
                                                                                            maxdam,
                                                                                            return_period,
                                                                                            country_code)])

        collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
        damaged_points_country = pg_points.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),
                                                    left_index=True,right_on='index')
        damaged_points_country = damaged_points_country.drop(['buffered'],axis=1)
        damaged_points[climate_model] = damaged_points_country
 
    elif hazard_type=='fl':
        # read flood data
        scenario_types = ('historical','rcp8p5')
        df_ds = open_flood_data(country_code) #['historical'].head(30) # REMOVE .HEAD(30)
        #time_periods = []
        
        #for time_period in time_periods:
        return_periods = ['rp0010','rp0050','rp0100','rp0500','rp1000']
        
        # calculate damaged lines in loop by country_code and climate_model
        damaged_lines = {}
        for scenario_type in scenario_types:
            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[scenario_type],pg_lines).T,columns=['asset','hazard_point'])
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,scenario_type)):
                for return_period in return_periods:
                    collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[scenario_type],
                                                                                           pg_lines,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])
                    
        collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
        damaged_lines_country = pg_lines.merge(pd.DataFrame(collect_line_damages,columns=['return_period','index','damage']),
                                                  left_index=True,right_on='index')
        damaged_lines = damaged_lines_country.drop(['buffered'],axis=1)
        
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        for scenario_type in scenario_types:
            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[scenario_type],pg_points).T,
                                          columns=['asset','hazard_point'])
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,scenario_type)):
                for return_period in return_periods:
                    collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                            df_ds[scenario_type],
                                                                                            pg_points,
                                                                                            curves,
                                                                                            maxdam,
                                                                                            return_period,
                                                                                            country_code)])

        collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
        damaged_points_country = pg_points.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),
                                                    left_index=True,right_on='index')
        damaged_points = damaged_points_country.drop(['buffered'],axis=1)
        
    return damaged_lines,damaged_points

pg_damage_infra = assess_damage_pg('LAO',pg_infra,'tc')

In [9]:
pg_damage_infra

({'_CMCC-CM2-VHR4':         status  capacity_kV              value   id      source country  \
  0     Existing          230  transmission_line  0.0  World Bank    Laos   
  1     Existing          230  transmission_line  0.0  World Bank    Laos   
  2     Existing          230  transmission_line  0.0  World Bank    Laos   
  3     Existing          230  transmission_line  0.0  World Bank    Laos   
  4     Existing          230  transmission_line  0.0  World Bank    Laos   
  ...        ...          ...                ...  ...         ...     ...   
  2130  Existing           22               None  NaN  World Bank    None   
  2131  Existing           22               None  NaN  World Bank    None   
  2132  Existing           22               None  NaN  World Bank    None   
  2133  Existing           22               None  NaN  World Bank    None   
  2134  Existing           22               None  NaN  World Bank    None   
  
       operator undergrnd phases cables  year asset  \


In [10]:
def country_analysis_pg(country_code,hazard_type): #
    
    # extract infrastructure data from OSM
    pg_infra = open_pg_data(country_code)

    # assess damage to wind storms
    pg_damage_infra = assess_damage_pg(country_code,pg_infra,hazard_type)

    return pg_damage_infra
    
    
pg_damage_infra = country_analysis_pg('LAO','fl') #,'line','PG'

<class 'pandas.core.frame.DataFrame'>
<class 'tuple'>
Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for LAO fl (historical):  74%|█████████████████      | 317/427 [3:20:12<1:09:28, 37.89s/it]
Exception ignored in: <function ZipFile.__del__ at 0x0000012B1188B040>
Traceback (most recent call last):
  File "C:\Users\mye500\Miniconda3\envs\pgrisk\lib\zipfile.py", line 1816, in __del__
    self.close()
KeyboardInterrupt: 

KeyboardInterrupt



In [11]:
pg_damage_infra

({'_CMCC-CM2-VHR4':         status  capacity_kV              value   id      source country  \
  0     Existing          230  transmission_line  0.0  World Bank    Laos   
  1     Existing          230  transmission_line  0.0  World Bank    Laos   
  2     Existing          230  transmission_line  0.0  World Bank    Laos   
  3     Existing          230  transmission_line  0.0  World Bank    Laos   
  4     Existing          230  transmission_line  0.0  World Bank    Laos   
  ...        ...          ...                ...  ...         ...     ...   
  2130  Existing           22               None  NaN  World Bank    None   
  2131  Existing           22               None  NaN  World Bank    None   
  2132  Existing           22               None  NaN  World Bank    None   
  2133  Existing           22               None  NaN  World Bank    None   
  2134  Existing           22               None  NaN  World Bank    None   
  
       operator undergrnd phases cables  year asset  \


In [None]:
"""
def country_analysis_pg(country_code,hazard_type): #
    
    # extract infrastructure data from gov data
    osm_power_infra = extract_pg_infra(country_code,pg_data_path)
    osm_damage_infra = assess_damage_pg(country_code,pg_data_country,hazard_type)
    
    return osm_damage_infra
    
    
osm_damage_infra = country_analysis_pg('LAO','fl') #,'line','PG'
"""

In [None]:
def clip_gridfinder(country_code):
    base_map_path = os.path.join(data_path,'base_map')

    cty_boundary_path = os.path.join(base_map_path,'gadm41_{}.gpkg'.format(country_code))
    cty_boundary = gpd.read_file(cty_boundary_path)
    #mask = pd.DataFrame(mask.copy())
    #mask.geometry = pygeos.from_shapely(mask.geometry)
    #mask['geometry'] = reproject(mask)

    gridfinder_path = r'C:\Users\mye500\OneDrive - Vrije Universiteit Amsterdam\01_Research-Projects\01_risk_assessment\PG_data\gridfinder\grid.gpkg'
    gridfinder = gpd.read_file(gridfinder_path)
    #gridfinder = pd.DataFrame(gridfinder.copy())
    #gridfinder.geometry = pygeos.from_shapely(gridfinder.geometry)
    #gridfinder['geometry'] = reproject(gridfinder)

    clipped = gpd.clip(gridfinder,cty_boundary)

    return clipped

In [None]:
clip_gridfinder('TWN')