In [1]:
import geopandas as gpd
import pandas as pd
from osgeo import ogr,gdal
import os
import xarray as xr
import rasterio
import numpy as np
import pyproj
from pygeos import from_wkb,from_wkt
import pygeos
from tqdm import tqdm
from shapely.wkb import loads
from pathlib import Path
import glob
from shapely.geometry import mapping
pd.options.mode.chained_assignment = None
from rasterio.mask import mask
import rioxarray
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [2]:
gdal.SetConfigOption("OSM_CONFIG_FILE", os.path.join('..',"osmconf.ini"))

# change paths to make it work on your own machine
data_path = os.path.join('C:\\','Data','pg_risk_analysis')
tc_path = os.path.join(data_path,'tc_netcdf')
fl_path = os.path.join(data_path,'GLOFRIS')
osm_data_path = os.path.join('C:\\','Data','country_osm')
pg_data_path = os.path.join(data_path,'pg_data')
vul_curve_path = os.path.join(data_path,'vulnerability_curves','input_vulnerability_data.xlsx')
output_path = os.path.join('C:\\','projects','pg_risk_analysis','output')

In [3]:
def query_b(geoType,keyCol,**valConstraint):
    """
    This function builds an SQL query from the values passed to the retrieve() function.
    Arguments:
         *geoType* : Type of geometry (osm layer) to search for.
         *keyCol* : A list of keys/columns that should be selected from the layer.
         ***valConstraint* : A dictionary of constraints for the values. e.g. WHERE 'value'>20 or 'value'='constraint'
    Returns:
        *string: : a SQL query string.
    """
    query = "SELECT " + "osm_id"
    for a in keyCol: query+= ","+ a  
    query += " FROM " + geoType + " WHERE "
    # If there are values in the dictionary, add constraint clauses
    if valConstraint: 
        for a in [*valConstraint]:
            # For each value of the key, add the constraint
            for b in valConstraint[a]: query += a + b
        query+= " AND "
    # Always ensures the first key/col provided is not Null.
    query+= ""+str(keyCol[0]) +" IS NOT NULL" 
    return query 


def retrieve(osm_path,geoType,keyCol,**valConstraint):
    """
    Function to extract specified geometry and keys/values from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.     
        *geoType* : Type of Geometry to retrieve. e.g. lines, multipolygons, etc.
        *keyCol* : These keys will be returned as columns in the dataframe.
        ***valConstraint: A dictionary specifiying the value constraints.  
        A key can have multiple values (as a list) for more than one constraint for key/value.  
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all columns, geometries, and constraints specified.    
    """
    driver=ogr.GetDriverByName('OSM')
    data = driver.Open(osm_path)
    query = query_b(geoType,keyCol,**valConstraint)
    sql_lyr = data.ExecuteSQL(query)
    features =[]
    # cl = columns 
    cl = ['osm_id'] 
    for a in keyCol: cl.append(a)
    if data is not None:
        print('query is finished, lets start the loop')
        for feature in tqdm(sql_lyr,desc='extract'):
            #try:
            if feature.GetField(keyCol[0]) is not None:
                geom1 = (feature.geometry().ExportToWkt())
                #print(geom1)
                geom = from_wkt(feature.geometry().ExportToWkt()) 
                if geom is None:
                    continue
                # field will become a row in the dataframe.
                field = []
                for i in cl: field.append(feature.GetField(i))
                field.append(geom)   
                features.append(field)
            #except:
            #    print("WARNING: skipped OSM feature")   
    else:
        print("ERROR: Nonetype error when requesting SQL. Check required.")    
    cl.append('geometry')                   
    if len(features) > 0:
        return pd.DataFrame(features,columns=cl)
    else:
        print("WARNING: No features or No Memory. returning empty GeoDataFrame") 
        return pd.DataFrame(columns=['osm_id','geometry'])

def power_polyline(osm_path):
    """
    Function to extract all energy linestrings from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'lines',['power','voltage'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #print(df) #check infra keys
    
    return df.reset_index(drop=True)

def power_polygon(osm_path): # check with joel, something was wrong here with extracting substations
    """
    Function to extract energy polygons from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'multipolygons',['other_tags']) 
    
    df = df.loc[(df.other_tags.str.contains('power'))]   #keep rows containing power data         
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
    
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
            
    return df.reset_index(drop=True) 

def electricity(osm_path):
    """
    Function to extract building polygons from OpenStreetMap    
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all unique building polygons.    
    """
    df = retrieve(osm_path,'multipolygons',['power'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #df = df[df.asset!='generator']
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
    #print(df)  #check infra keys
    
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
    
    return df.reset_index(drop=True)

def retrieve_poly_subs(osm_path, w_list, b_list):
    """
    Function to extract electricity substation polygons from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region
        for which we want to do the analysis.
        *w_list* :  white list of keywords to search in the other_tags columns
        *b_list* :  black list of keywords of rows that should not be selected
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique substation.
    """
    df = retrieve(osm_path,'multipolygons',['other_tags'])
    df = df[df.other_tags.str.contains('substation', case=False, na=False)]
    #df = df.loc[(df.other_tags.str.contains('substation'))]
    df = df[~df.other_tags.str.contains('|'.join(b_list))]
    #df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})
    df['asset']  = 'substation' #specify row
    #df = df.loc[(df.asset == 'substation')] #specify row
    return df.reset_index(drop=True)

def power_point(osm_path):
    """
    Function to extract energy points from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """   
    df = retrieve(osm_path,'points',['other_tags']) 
    df = df.loc[(df.other_tags.str.contains('power'))]  #keep rows containing power data       
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
    
    #print(df)
    
    df['asset'].loc[df['asset'].str.contains('"power"=>"tower"', case=False)]  = 'power_tower' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"pole"', case=False)] = 'power_pole' #specify row
    #df['asset'].loc[df['asset'].str.contains('"utility"=>"power"', case=False)] = 'power_tower' #specify row
    
    df = df.loc[(df.asset == 'power_tower') | (df.asset == 'power_pole')]
            
    return df.reset_index(drop=True)

In [83]:
def reproject(df_ds,current_crs="epsg:4326",approximate_crs = "epsg:3857"):
    """[summary]

    Args:
        df_ds ([type]): [description]
        current_crs (str, optional): [description]. Defaults to "epsg:3857".
        approximate_crs (str, optional): [description]. Defaults to "epsg:4326".

    Returns:
        [type]: [description]
    """    

    geometries = df_ds['geometry']
    coords = pygeos.get_coordinates(geometries)
    transformer=pyproj.Transformer.from_crs(current_crs, approximate_crs,always_xy=True)
    new_coords = transformer.transform(coords[:, 0], coords[:, 1])
    
    return pygeos.set_coordinates(geometries.copy(), np.array(new_coords).T) 

def load_curves_maxdam(vul_curve_path,hazard_type):
    """[summary]

    Args:
        data_path ([type]): [description]

    Returns:
        [type]: [description]
    """

    if hazard_type == 'tc':
        sheet_name = 'wind_curves'
    
    elif hazard_type == 'fl':
        sheet_name = 'flooding_curves'
    
    # load curves and maximum damages as separate inputs
    curves = pd.read_excel(vul_curve_path,sheet_name=sheet_name,skiprows=11,index_col=[0])
    
    if hazard_type == 'fl':
        maxdam = pd.read_excel(vul_curve_path,sheet_name=sheet_name,index_col=[0]).iloc[:8]
    elif hazard_type == 'tc':
        maxdam = pd.read_excel(vul_curve_path,sheet_name=sheet_name,index_col=[0],header=[0,1]).iloc[:8]
        maxdam = maxdam.rename({'substation_point':'substation'},level=0,axis=1)
            
    curves.columns = maxdam.columns
        
    #transpose maxdam so its easier work with the dataframe
    maxdam = maxdam.T

    #interpolate the curves to fill missing values
    curves = curves.interpolate()
       
    return curves,maxdam

def buffer_assets(assets, buffer_size=100):
    """
    Create a buffer of a specified size around the geometries in a GeoDataFrame.
    
    Args:
        assets (GeoDataFrame): A GeoDataFrame containing geometries to be buffered.
        buffer_size (int, optional): The distance in the units of the GeoDataFrame's CRS to buffer the geometries.
            Defaults to 100.
    
    Returns:
        GeoDataFrame: A new GeoDataFrame with an additional column named 'buffered' containing the buffered
            geometries.
    """
    # Create a buffer of the specified size around the geometries
    assets['buffered'] = pygeos.buffer(assets.geometry.values, buffer_size)
    
    return assets


def overlay_hazard_assets(df_ds, assets):
    """
    Overlay a set of assets with a hazard dataset and return the subset of assets that intersect with
    one or more hazard polygons or lines.
    
    Args:
        df_ds (GeoDataFrame): A GeoDataFrame containing the hazard dataset.
        assets (GeoDataFrame): A GeoDataFrame containing the assets to be overlaid with the hazard dataset.
    
    Returns:
        ndarray: A numpy array of integers representing the indices of the hazard geometries that intersect with
            the assets. If the assets have a 'buffered' column, the buffered geometries are used for the overlay.
    """
    hazard_tree = pygeos.STRtree(df_ds.geometry.values)
    if (pygeos.get_type_id(assets.iloc[0].geometry) == 3) | (pygeos.get_type_id(assets.iloc[0].geometry) == 6):
        return  hazard_tree.query_bulk(assets.geometry,predicate='intersects')    
    else:
        return  hazard_tree.query_bulk(assets.buffered,predicate='intersects')
    
def get_damage_per_asset_per_rp(asset,df_ds,assets,curves,maxdam,return_period,country):
    """
    Calculates the damage per asset per return period based on asset type, hazard curves and maximum damage

    Args:
        asset (tuple): Tuple with two dictionaries, containing the asset index and the hazard point index of the asset
        df_ds (pandas.DataFrame): A pandas DataFrame containing hazard points with a 'geometry' column
        assets (geopandas.GeoDataFrame): A GeoDataFrame containing asset geometries and asset type information
        curves (dict): A dictionary with the asset types as keys and their corresponding hazard curves as values
        maxdam (pandas.DataFrame): A pandas DataFrame containing the maximum damage for each asset type
        return_period (str): The return period for which the damage should be calculated
        country (str): The country for which the damage should be calculated

    Returns:
        list or tuple: Depending on the input, the function either returns a list of tuples with the asset index, the curve name and the calculated damage, or a tuple with None, None, None if no hazard points are found
    """
    

    # find the exact hazard overlays:
    get_hazard_points = df_ds.iloc[asset[1]['hazard_point'].values].reset_index()
    get_hazard_points = get_hazard_points.loc[pygeos.intersects(get_hazard_points.geometry.values,assets.iloc[asset[0]].geometry)]

    asset_type = assets.iloc[asset[0]].asset
    asset_geom = assets.iloc[asset[0]].geometry

    if asset_type in ['plant','substation','generator']:
        maxdam_asset = maxdam.loc[asset_type].MaxDam/pygeos.area(asset_geom)
    else:
        maxdam_asset = maxdam.loc[asset_type].MaxDam

    hazard_intensity = curves[asset_type].index.values
    
    if isinstance(curves[asset_type],pd.core.series.Series):
        fragility_values = curves[asset_type].values.flatten()
        only_one = True
        curve_name = curves[asset_type].name
    elif len(curves[asset_type].columns) == 1:
        fragility_values = curves[asset_type].values.flatten()      
        only_one = True   
        curve_name = curves[asset_type].columns[0]
    else:
        fragility_values = curves[asset_type].values#.T[0]
        maxdam_asset = maxdam_asset.values#[0]
        only_one = False

    if len(get_hazard_points) == 0:
        return None,None,None
    else:
        if only_one:    
            # run the calculation as normal when the asset just has a single curve
            if pygeos.get_type_id(asset_geom) == 1:            
                get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
                return asset[0],curve_name,np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                             fragility_values))*get_hazard_points.overlay_meters*maxdam_asset)

            elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
                return asset[0],curve_name,get_hazard_points.apply(lambda x: np.interp(x[return_period], 
                                                                                  hazard_intensity, 
                                                                                  fragility_values)*maxdam_asset*x.overlay_m2,axis=1).sum()     

            else:
                return asset[0],curve_name,np.sum((np.interp(get_hazard_points[return_period].values,
                                                             hazard_intensity,fragility_values))*maxdam_asset)
        else:
            if pygeos.get_type_id(asset_geom) == 1:            
                get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            
            collect_all = []
            for iter_,curve_ids in enumerate(curves[asset_type].columns):
                if pygeos.get_type_id(asset_geom) == 1:                           
                    collect_all.append([asset[0],curves[asset_type].columns[iter_],np.sum((np.interp(get_hazard_points[return_period].values,
                                      hazard_intensity,
                                      fragility_values.T[iter_]))*get_hazard_points.overlay_meters*maxdam_asset[iter_])])
                
                elif (pygeos.get_type_id(asset_geom) == 3) | (pygeos.get_type_id(asset_geom) == 6) :
                    collect_all.append([asset[0],curves[asset_type].columns[iter_],get_hazard_points.apply(lambda x: np.interp(x[return_period], 
                                                              hazard_intensity, 
                                                              fragility_values.T[iter_])*maxdam_asset[iter_]*x.overlay_m2,axis=1).sum()])     

                else:
                    collect_all.append([asset[0],curves[asset_type].columns[iter_],
                                              np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,
                                                                fragility_values.T[iter_]))*maxdam_asset[iter_])])
            return collect_all


In [136]:
load_curves_maxdam(vul_curve_path,'tc')[1].loc['power_pole']

Unnamed: 0_level_0,Specific occupancy,Reference,Type vulnerability data,Unit,MaxDam,LowerDam,UpperDam,UpperDam
Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
W4_5,"Class 4, age 0","Salman and Li, 2016",curve,euro/facility,2800.665206,2100.498904,3500.831507,
W4_6,"Steel, age 0","Salman and Li, 2016",curve,euro/facility,3019.333691,2264.500268,3774.167114,
W4_7,"Class 4, age 20 + CPD","Salman and Li, 2016",curve,euro/facility,2800.665206,2100.498904,3500.831507,
W4_8,"Class 4, age 20 no CPD","Salman and Li, 2016",curve,euro/facility,2800.665206,2100.498904,3500.831507,
W4_9,"Steel, age 20","Salman and Li, 2016",curve,euro/facility,3019.333691,2264.500268,3774.167114,
W4_10,"Class 4, age 40 + CPD","Salman and Li, 2016",curve,euro/facility,2800.665206,2100.498904,3500.831507,
W4_11,"Class 4, age 40 no CPD","Salman and Li, 2016",curve,euro/facility,2800.665206,2100.498904,3500.831507,
W4_12,"Steel, age 40","Salman and Li, 2016",curve,euro/facility,3019.333691,2264.500268,3774.167114,
W4_13,"Class 4, age 60 + CPD","Salman and Li, 2016",curve,euro/facility,2800.665206,2100.498904,3500.831507,
W4_14,"Class 4, age 60 no CPD","Salman and Li, 2016",curve,euro/facility,2800.665206,2100.498904,3500.831507,


In [6]:
def load_storm_data(climate_model,basin,bbox,ne_crs):
    
    with xr.open_dataset(os.path.join(tc_path,'STORM_FIXED_RETURN_PERIODS{}_{}.nc'.format(climate_model,basin))) as ds:
        
        ds.rio.write_crs(4326, inplace=True)
        ds = ds.rio.clip_box(minx=bbox[0],miny=bbox[1],maxx=bbox[2],maxy=bbox[3])
        
        # get the mean values
        df_ds = ds['mean'].to_dataframe().unstack(level=2).reset_index()

        # create geometry values and drop lat lon columns
        df_ds['geometry'] = [pygeos.points(x) for x in list(zip(df_ds['lon'],df_ds['lat']))]
        df_ds = df_ds.drop(['lat','lon'],axis=1,level=0)
        
        # interpolate wind speeds of 1, 2, and 5-yr return period
        ## rename columns to return periods (must be integer for interpolating)
        df_ds_geometry = pd.DataFrame()
        df_ds_geometry['geometry'] = df_ds['geometry']
        df_ds = df_ds.drop(['geometry'],axis=1,level=0)
        df_ds = df_ds['mean']
        df_ds.columns = [int(x) for x in ds['mean']['rp']]
        df_ds[1] = np.nan
        df_ds[2] = np.nan
        df_ds[5] = np.nan
        df_ds[25] = np.nan
        df_ds[250] = np.nan
        df_ds = df_ds.reindex(sorted(df_ds.columns), axis=1)
        df_ds = df_ds.interpolate(method='linear',axis=1,limit_direction='both')
        df_ds['geometry'] = df_ds_geometry['geometry']
        #df_ds = df_ds[['1','2','5','10','25','50','100','250','500','1000','geometry']]
        df_ds = df_ds[[1,2,5,10,25,50,100,250,500,1000,'geometry']]
        
        #rename columns to return periods
        #return_periods = ['1_{}{}'.format(int(x),climate_model) for x in ds['rp']]
        df_ds.columns = ['1_{}{}'.format(int(x),climate_model) for x in [1,2,5,10,25,50,100,250,500,1000]] +['geometry']     
        df_ds['geometry'] = pygeos.buffer(df_ds.geometry,radius=0.1/2,cap_style='square').values
        df_ds['geometry'] = reproject(df_ds)
            
        # drop all non values to reduce size
        #df_ds = df_ds.loc[~df_ds['1_10000{}'.format(climate_model)].isna()].reset_index(drop=True)
        df_ds = df_ds.fillna(0)
        #df_ds = df_ds[['1','2','5','10','25','50','100','250','500','1000']]
        #df_ds = df_ds[['1_{}{}'.format(int(x),climate_model) for x in list(df_ds.columns.get_level_values(0))[:-1]]+['geometry']]

    return df_ds

def open_storm_data(country_code):
    climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
    df_ds = {}
    
    country_basin = {
    "BRN": ["WP"],
    "KHM": ["WP"],
    "CHN": ["WP", "NI"],
    "IDN": ["SI", "SP", "NI", "WP"],
    "JPN": ["WP"],
    "LAO": ["WP"],
    "MYS": ["WP", "NI"],
    "MNG": ["WP", "NI"],
    "MMR": ["NI", "WP"],
    "PRK": ["WP"],
    "PHL": ["WP"],
    "SGP": ["WP"],
    "KOR": ["WP"],
    "TWN": ["WP"],
    "THA": ["WP", "NI"],
    "VNM": ["WP"] }
    
    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file('C:\\Data\\natural_earth\\ne_10m_admin_0_countries.shp') 
    ne_crs = ne_countries.crs
    bbox = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.buffer(1).values[0].bounds

    for climate_model in climate_models:
        concat_prep = []
        #combine STORM data from different basins
        if "WP" in country_basin[country_code]:
            WP = load_storm_data(climate_model,'WP',bbox,ne_crs)
            concat_prep.append(WP)
        if "SP" in country_basin[country_code]:
            SP = load_storm_data(climate_model,'SP',bbox,ne_crs)
            concat_prep.append(SP)
        if "NI" in country_basin[country_code]:            
            NI = load_storm_data(climate_model,'NI',bbox,ne_crs)
            concat_prep.append(NI)            
        if "SI" in country_basin[country_code]:       
            SI = load_storm_data(climate_model,'SI',bbox,ne_crs)
            concat_prep.append(SI)            
                   
        df_ds_cl = pd.concat(concat_prep,keys=country_basin[country_code])#,sp,ni,si,'sp','ni','si'])

        df_ds_cl = df_ds_cl.reset_index(drop=True)
        
        df_ds[climate_model] = df_ds_cl
    
    return df_ds

In [432]:
%%time
open_storm_data('TWN')['']

CPU times: total: 1.73 s
Wall time: 1.72 s


Unnamed: 0,1_1,1_2,1_5,1_10,1_25,1_50,1_100,1_250,1_500,1_1000,geometry
0,31.484970,31.484970,31.484970,31.484970,34.882271,37.150791,38.786932,41.029938,42.933264,43.897242,"POLYGON ((13057776.27 2391878.588, 13057776.27..."
1,31.561684,31.561684,31.561684,31.561684,34.941517,37.417290,38.952814,40.987155,42.433489,43.846372,"POLYGON ((13068908.219 2391878.588, 13068908.2..."
2,31.559660,31.559660,31.559660,31.559660,34.989208,37.441990,39.295692,41.087890,42.636647,43.937314,"POLYGON ((13080040.168 2391878.588, 13080040.1..."
3,31.649051,31.649051,31.649051,31.649051,35.093386,37.464786,39.427878,41.296611,42.939086,44.289626,"POLYGON ((13091172.117 2391878.588, 13091172.1..."
4,31.749391,31.749391,31.749391,31.749391,35.266336,37.634156,39.579647,41.514516,42.741313,44.298240,"POLYGON ((13102304.066 2391878.588, 13102304.0..."
...,...,...,...,...,...,...,...,...,...,...,...
3181,35.069860,35.069860,35.069860,35.069860,39.243768,42.045388,44.128518,46.371252,48.097373,49.527067,"POLYGON ((13658901.52 3036284.923, 13658901.52..."
3182,35.159120,35.159120,35.159120,35.159120,39.261418,41.821312,44.056902,46.617187,48.180678,49.415934,"POLYGON ((13670033.469 3036284.923, 13670033.4..."
3183,35.238116,35.238116,35.238116,35.238116,39.250911,41.894078,44.101615,46.562302,47.930412,48.967779,"POLYGON ((13681165.418 3036284.923, 13681165.4..."
3184,35.309186,35.309186,35.309186,35.309186,39.248515,41.935010,44.177309,46.750433,47.926717,48.968906,"POLYGON ((13692297.368 3036284.923, 13692297.3..."


In [7]:
def clip_flood_data(country_code):

    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file('C:\\Data\\natural_earth\\ne_10m_admin_0_countries.shp') 
    geometry = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.values[0]
    geoms = [mapping(geometry)]
    
    #climate_model: historical, rcp4p5, rcp8p5; time_period: hist, 2030, 2050, 2080
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    climate_models = ['historical','rcp8p5']
    
    
    for rp in rps:
        #global input_file
        for climate_model in climate_models:
            if climate_model=='historical':
                #f rps=='0001':
                    input_file = os.path.join(fl_path,'global',
                                              'inuncoast_{}_nosub_hist_rp{}_0.tif'.format(climate_model,rp)) 
                #elif rps==['0002','0005','0010','0025','0050','0100','0250','0500','1000']:
                #    input_file = os.path.join(fl_path,'global',
                #                              'inuncoast_{}_nosub_hist_rp{}_0.tif'.format(climate_model,rp)) 
            elif climate_model=='rcp8p5':
                #f rps=='0001':
                    input_file = os.path.join(fl_path,'global',
                                              'inuncoast_{}_nosub_2030_rp{}_0.tif'.format(climate_model,rp))
                #elif rps==['0002','0005','0010','0025','0050','0100','0250','0500','1000']:
                #    input_file = os.path.join(fl_path,'global',
                #                              'inuncoast_{}_nosub_2030_rp{}_0.tif'.format(climate_model,rp))
            
            # load raster file and save clipped version
            with rasterio.open(input_file) as src:
                out_image, out_transform = mask(src, geoms, crop=True)
                out_meta = src.meta

                out_meta.update({"driver": "GTiff",
                         "height": out_image.shape[1],
                         "width": out_image.shape[2],
                         "transform": out_transform})

                file_path = os.path.join(fl_path,'country','_'.join([country_code]+input_file.split('_')[3:]))

                with rasterio.open(file_path, "w", **out_meta) as dest:
                    dest.write(out_image)

def load_flood_data(country_code,scenario_type):
    files = [x for x in os.listdir(os.path.join(fl_path,'country'))  if country_code in x ]
    
    rps = ['0001','0002','0005','0010','0025','0050','0100','0250','0500','1000']
    collect_df_ds = []
    
    if scenario_type=='historical':
        print('Loading historical coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_hist_rp{}_0.tif'.format(country_code,scenario_type,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                df_ds['rp'+rp] = (df_ds['rp'+rp]*100)         
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=100/2,cap_style='square').values  #?????????????????????????
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])
        df_all = df_all.loc[df_all['rp1000']>0].reset_index(drop=True)

    elif scenario_type=='rcp8p5':
        print('Loading future coastal flood data ...')
        for rp in rps:
            #for file in files:
            file_path = os.path.join(fl_path,'country','{}_{}_nosub_2030_rp{}_0.tif'.format(country_code,scenario_type,rp))
            with xr.open_dataset(file_path) as ds: #, engine="rasterio"
                df_ds = ds.to_dataframe().reset_index()
                df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
                df_ds = df_ds.rename(columns={'band_data': 'rp'+rp}) #rename to return period
                df_ds['rp'+rp] = (df_ds['rp'+rp]*100)
                df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
                df_ds = df_ds.dropna()
                df_ds = df_ds.reset_index(drop=True)
                df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=100/2,cap_style='square').values
                df_ds['geometry'] = reproject(df_ds)
                collect_df_ds.append(df_ds)

        df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])\
                 .merge(collect_df_ds[5]).merge(collect_df_ds[6]).merge(collect_df_ds[7]).merge(collect_df_ds[8]).merge(collect_df_ds[9])

        df_all = df_all.loc[df_all['rp1000']>0].reset_index(drop=True)
    return df_all

def open_flood_data(country_code):
    scenario_types = ['historical','rcp8p5']
    df_ds = {}
    for scenario_type in scenario_types:
        #hist = load_flood_data(country_code,'historical')
        #rcp8p5 = load_flood_data(country_code,'rcp8p5')
        #df_ds_sc = pd.concat([hist,rcp8p5],keys=['historical','rcp8p5'])
        df_ds_sc = load_flood_data(country_code,scenario_type)

        df_ds[scenario_type] = df_ds_sc
    
    return df_ds

In [42]:
%%time
twn_flood = open_flood_data('TWN')
#print(type(lao_flood))

Loading historical coastal flood data ...
Loading future coastal flood data ...
CPU times: total: 17.9 s
Wall time: 17.9 s


# OSM data processing

In [5]:
def extract_osm_infrastructure(country_code,osm_data_path):

    # lines
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_lines_country = power_polyline(osm_path)
    power_lines_country['geometry'] = reproject(power_lines_country)
    power_lines_country = buffer_assets(power_lines_country.loc[power_lines_country.asset.isin(
        ['cable','minor_cable','line','minor_line'])],buffer_size=100).reset_index(drop=True)
    
    # polygons
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_poly_country = electricity(osm_path)
    power_poly_country['geometry'] = reproject(power_poly_country)
    
    # points
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_points_country = power_point(osm_path)
    power_points_country['geometry'] = reproject(power_points_country)
    power_points_country = buffer_assets(power_points_country.loc[power_points_country.asset.isin(
        ['power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)
    #print(power_points_country)
    #print(type(power_points_country))

    return power_lines_country,power_poly_country,power_points_country


#print(type(osm_power_infra))

In [139]:
osm_power_infra = extract_osm_infrastructure('LAO',osm_data_path)

query is finished, lets start the loop


extract: 100%|██████████████████████████████████████████████████████████████████████| 412/412 [00:01<00:00, 258.47it/s]


query is finished, lets start the loop


extract: 100%|█████████████████████████████████████████████████████████████████████████| 14/14 [00:04<00:00,  3.24it/s]


query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████| 42729/42729 [00:02<00:00, 16033.53it/s]


In [137]:
def assess_damage_osm(country_code,osm_power_infra,hazard_type):
    
    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(vul_curve_path,hazard_type)
    
    # read infrastructure data:
    power_lines,power_poly,power_points = osm_power_infra
    #print(type(power_lines))
    #print(type(osm_power_infra))
    
    if hazard_type=='tc':
        # read wind data
        climate_models = ['']#,'_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        df_ds = open_storm_data(country_code)
        
        # remove assets that will not have any damage
        power_lines = power_lines.loc[power_lines.asset != 'cable'].reset_index(drop=True)
        power_poly = power_poly.loc[power_poly.asset != 'plant'].reset_index(drop=True)

    elif hazard_type=='fl':
        # read flood data
        climate_models = ['historical']#,'rcp8p5']
        df_ds = open_flood_data(country_code) 
        
    #calculate damaged lines in loop by climate_model
    damaged_lines = {}
    for climate_model in climate_models:
        
        if hazard_type == 'tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000']  

        overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],power_lines).T,
                                     columns=['asset','hazard_point'])
        collect_line_damages = []
        for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                          desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
            for return_period in return_periods:
                collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                       df_ds[climate_model],
                                                                                       power_lines,
                                                                                       curves,
                                                                                       maxdam,
                                                                                       return_period,
                                                                                       country_code)])

        get_asset_type_line = dict(zip(power_lines.index,power_lines.asset))

        if hazard_type == 'tc':
            results = pd.concat([pd.DataFrame(line[1],columns=pd.MultiIndex.from_product([[line[0]],
                                                                                ['asset','curve','damage']])).stack(level=0) 
                       for line in collect_line_damages]).reset_index(level=1).reset_index(drop=True)
        elif hazard_type == 'fl':
            results = pd.concat([pd.DataFrame([line[1]],columns=pd.MultiIndex.from_product([[line[0]],
                                                                                        ['asset','curve','damage']])).stack(level=0) 
                               for line in collect_line_damages]).reset_index(level=1).reset_index(drop=True) 
            
        results.columns = ['rp','asset','curve','damage']
        results['asset_type'] = results.asset.apply(lambda x : get_asset_type_line[x])

        damaged_lines[climate_model] = results.groupby(['rp','curve','asset_type']).sum()['damage'].reset_index()

    # calculate damaged polygons in loop by country_code and climate_model
    damaged_poly = {}
    for climate_model in climate_models:
        if hazard_type == 'tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000']  

        overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],power_poly).T,
                                    columns=['asset','hazard_point'])
        collect_poly_damages = []
        for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),
                          desc='polygon damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
            for return_period in return_periods:
                collect_poly_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                       df_ds[climate_model],
                                                                                       power_poly,
                                                                                       curves,
                                                                                       maxdam,
                                                                                       return_period,
                                                                                       country_code)])
        get_asset_type_poly = dict(zip(power_poly.index,power_poly.asset))

        if hazard_type == 'tc':
            results = pd.concat([pd.DataFrame(line[1],columns=pd.MultiIndex.from_product([[line[0]],
                                                                                ['asset','curve','damage']])).stack(level=0) 
                       for line in collect_poly_damages]).reset_index(level=1).reset_index(drop=True)
        elif hazard_type == 'fl':
            results = pd.concat([pd.DataFrame([line[1]],columns=pd.MultiIndex.from_product([[line[0]],
                                                                                        ['asset','curve','damage']])).stack(level=0) 
                               for line in collect_poly_damages]).reset_index(level=1).reset_index(drop=True) 

        results.columns = ['rp','asset','curve','damage']
        results['asset_type'] = results.asset.apply(lambda x : get_asset_type_poly[x])

        damaged_poly[climate_model] = results.groupby(['rp','curve','asset_type']).sum()['damage'].reset_index()

    # calculate damaged points in loop by country_code and climate_model
    damaged_points = {}
    for climate_model in climate_models:
        if hazard_type == 'tc':
            return_periods = ['1_1{}'.format(climate_model),'1_2{}'.format(climate_model),'1_5{}'.format(climate_model),'1_10{}'.format(climate_model),
                              '1_25{}'.format(climate_model),'1_50{}'.format(climate_model),'1_100{}'.format(climate_model),
                              '1_250{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]
        elif hazard_type == 'fl':
            return_periods = ['rp0001','rp0002','rp0005','rp0010','rp0025','rp0050','rp0100','rp0250','rp0500','rp1000']  

        overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],power_points).T,
                                      columns=['asset','hazard_point'])
        collect_point_damages = []
        for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                          desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
            for return_period in return_periods:
                collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                        df_ds[climate_model],
                                                                                        power_points,
                                                                                        curves,
                                                                                        maxdam,
                                                                                        return_period,
                                                                                        country_code)])

        get_asset_type_point = dict(zip(power_points.index,power_points.asset))

        if hazard_type == 'tc':
            results = pd.concat([pd.DataFrame(line[1],columns=pd.MultiIndex.from_product([[line[0]],
                                                                                ['asset','curve','damage']])).stack(level=0) 
                       for line in collect_point_damages]).reset_index(level=1).reset_index(drop=True)
        elif hazard_type == 'fl':
            results = pd.concat([pd.DataFrame([line[1]],columns=pd.MultiIndex.from_product([[line[0]],
                                                                                        ['asset','curve','damage']])).stack(level=0) 
                               for line in collect_point_damages]).reset_index(level=1).reset_index(drop=True) 

        results.columns = ['rp','asset','curve','damage']
        results['asset_type'] = results.asset.apply(lambda x : get_asset_type_point[x])

        damaged_points[climate_model] = results.groupby(['rp','curve','asset_type']).sum()['damage'].reset_index()

    return damaged_lines,damaged_poly,damaged_points


In [141]:
osm_damage_infra = assess_damage_osm('LAO',osm_power_infra,'tc')

Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for LAO fl (historical): 0it [00:00, ?it/s]


ValueError: No objects to concatenate

In [175]:
def country_analysis_osm(country_code,hazard_type): #
    
    # extract infrastructure data from OSM
    osm_power_infra = extract_osm_infrastructure(country_code,osm_data_path)
    
    # assess damage to hazard_type
    osm_damage_infra = assess_damage_osm(country_code,osm_power_infra,hazard_type)

    return osm_damage_infra

In [375]:
%%time
osm_damage_infra = country_analysis_osm('TWN','tc') #,'line','PG'

query is finished, lets start the loop


extract: 100%|████████████████████████████████████████████████████████████████████| 2643/2643 [00:04<00:00, 628.88it/s]


query is finished, lets start the loop


KeyboardInterrupt: 

In [None]:
#osm_damage_infra[1]['_CNRM-CM6-1-HR']
osm_damage_infra

# Government data processing

In [7]:
# load collected power grid data
def extract_pg_data(country_code,pg_type):
    files = [x for x in os.listdir(pg_data_path)  if country_code in x ]
    
    if pg_type=='line':
        for file in files: 
            file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))

            pg_data_country = gpd.read_file(file_path)
            pg_data_country = pd.DataFrame(pg_data_country.copy())
            #print(pg_data_country.head())
            pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
            pg_data_country['geometry'] = reproject(pg_data_country)

        pg_data_country = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['line'])],buffer_size=100).reset_index(drop=True)

    elif pg_type=='point':
        for file in files:
            file_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type))
                
            pg_data_country = gpd.read_file(file_path)
            pg_data_country = pd.DataFrame(pg_data_country.copy())
            #print(pg_data_country.head())
            pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
            pg_data_country['geometry'] = reproject(pg_data_country)
            #print(pg_data_country)

        pg_data_country = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['plant_point','substation_point','power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)

    return pg_data_country

def open_pg_data(country_code):
    pg_lines = extract_pg_data(country_code,'line')
    pg_points = extract_pg_data(country_code,'point')
    #print(pg_points)
    return pg_lines,pg_points

pg_infra = open_pg_data('LAO')

In [None]:
def assess_damage_pg(country_code,pg_infra,hazard_type):
    
    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(os.path.join(vul_curve_path,'infra_vulnerability_data.xlsx'))
    #curves['line'] = 1 # remove this when things work!
    
    # read infrastructure data:
    pg_lines,pg_points = pg_infra
    #print(type(pg_points))
    #print(type(pg_infra))
    
    pg_lines.head(5)
    pg_points.head(5)
    
    if hazard_type=='tc':
        # read wind data
        climate_models = ['','_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM'] # !!!!!!!!!!!!!!!!!!!!!!!!!!!
        df_ds = open_storm_data()
        
        # calculate damaged lines in loop by country_code and climate_model
        damaged_lines = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],
                                                               pg_lines).T,columns=['asset','hazard_point'])
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[climate_model],
                                                                                           pg_lines,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])

        collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
        damaged_lines_country = pg_lines.merge(pd.DataFrame(collect_line_damages,columns=['return_period','index','damage']),
                                                  left_index=True,right_on='index')
        damaged_lines_country = damaged_lines_country.drop(['buffered'],axis=1)
        damaged_lines[climate_model] = damaged_lines_country
        
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_points).T,
                                          columns=['asset','hazard_point'])
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                            df_ds[climate_model],
                                                                                            pg_points,
                                                                                            curves,
                                                                                            maxdam,
                                                                                            return_period,
                                                                                            country_code)])

        collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
        damaged_points_country = pg_points.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),
                                                    left_index=True,right_on='index')
        damaged_points_country = damaged_points_country.drop(['buffered'],axis=1)
        damaged_points[climate_model] = damaged_points_country
 
    elif hazard_type=='fl':
        # read flood data
        scenario_types = ('historical','rcp8p5')
        df_ds = open_flood_data(country_code) #['historical'].head(30) # REMOVE .HEAD(30)
        #time_periods = []
        
        #for time_period in time_periods:
        return_periods = ['rp0010','rp0050','rp0100','rp0500','rp1000']
        
        # calculate damaged lines in loop by country_code and climate_model
        damaged_lines = {}
        for scenario_type in scenario_types:
            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[scenario_type],pg_lines).T,columns=['asset','hazard_point'])
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {} ({})'.format(country_code,hazard_type,scenario_type)):
                for return_period in return_periods:
                    collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[scenario_type],
                                                                                           pg_lines,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])
                    
        collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
        damaged_lines_country = pg_lines.merge(pd.DataFrame(collect_line_damages,columns=['return_period','index','damage']),
                                                  left_index=True,right_on='index')
        damaged_lines = damaged_lines_country.drop(['buffered'],axis=1)
        
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        for scenario_type in scenario_types:
            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[scenario_type],pg_points).T,
                                          columns=['asset','hazard_point'])
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {} ({})'.format(country_code,hazard_type,scenario_type)):
                for return_period in return_periods:
                    collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                            df_ds[scenario_type],
                                                                                            pg_points,
                                                                                            curves,
                                                                                            maxdam,
                                                                                            return_period,
                                                                                            country_code)])

        collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
        damaged_points_country = pg_points.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),
                                                    left_index=True,right_on='index')
        damaged_points = damaged_points_country.drop(['buffered'],axis=1)
        
    return damaged_lines,damaged_points

pg_damage_infra = assess_damage_pg('LAO',pg_infra,'tc')

In [45]:
pg_damage_infra

NameError: name 'pg_damage_infra' is not defined

In [10]:
def country_analysis_pg(country_code,hazard_type): #
    
    # extract infrastructure data from OSM
    pg_infra = open_pg_data(country_code)

    # assess damage to wind storms
    pg_damage_infra = assess_damage_pg(country_code,pg_infra,hazard_type)

    return pg_damage_infra
    
    
pg_damage_infra = country_analysis_pg('LAO','fl') #,'line','PG'

<class 'pandas.core.frame.DataFrame'>
<class 'tuple'>
Loading historical coastal flood data ...
Loading future coastal flood data ...


polyline damage calculation for LAO fl (historical):  74%|█████████████████      | 317/427 [3:20:12<1:09:28, 37.89s/it]
Exception ignored in: <function ZipFile.__del__ at 0x0000012B1188B040>
Traceback (most recent call last):
  File "C:\Users\mye500\Miniconda3\envs\pgrisk\lib\zipfile.py", line 1816, in __del__
    self.close()
KeyboardInterrupt: 

KeyboardInterrupt



In [11]:
pg_damage_infra

({'_CMCC-CM2-VHR4':         status  capacity_kV              value   id      source country  \
  0     Existing          230  transmission_line  0.0  World Bank    Laos   
  1     Existing          230  transmission_line  0.0  World Bank    Laos   
  2     Existing          230  transmission_line  0.0  World Bank    Laos   
  3     Existing          230  transmission_line  0.0  World Bank    Laos   
  4     Existing          230  transmission_line  0.0  World Bank    Laos   
  ...        ...          ...                ...  ...         ...     ...   
  2130  Existing           22               None  NaN  World Bank    None   
  2131  Existing           22               None  NaN  World Bank    None   
  2132  Existing           22               None  NaN  World Bank    None   
  2133  Existing           22               None  NaN  World Bank    None   
  2134  Existing           22               None  NaN  World Bank    None   
  
       operator undergrnd phases cables  year asset  \


In [None]:
"""
def country_analysis_pg(country_code,hazard_type): #
    
    # extract infrastructure data from gov data
    osm_power_infra = extract_pg_infra(country_code,pg_data_path)
    osm_damage_infra = assess_damage_pg(country_code,pg_data_country,hazard_type)
    
    return osm_damage_infra
    
    
osm_damage_infra = country_analysis_pg('LAO','fl') #,'line','PG'
"""

In [None]:
def clip_gridfinder(country_code):
    base_map_path = os.path.join(data_path,'base_map')

    cty_boundary_path = os.path.join(base_map_path,'gadm41_{}.gpkg'.format(country_code))
    cty_boundary = gpd.read_file(cty_boundary_path)
    #mask = pd.DataFrame(mask.copy())
    #mask.geometry = pygeos.from_shapely(mask.geometry)
    #mask['geometry'] = reproject(mask)

    gridfinder_path = r'C:\Users\mye500\OneDrive - Vrije Universiteit Amsterdam\01_Research-Projects\01_risk_assessment\PG_data\gridfinder\grid.gpkg'
    gridfinder = gpd.read_file(gridfinder_path)
    #gridfinder = pd.DataFrame(gridfinder.copy())
    #gridfinder.geometry = pygeos.from_shapely(gridfinder.geometry)
    #gridfinder['geometry'] = reproject(gridfinder)

    clipped = gpd.clip(gridfinder,cty_boundary)

    return clipped

In [None]:
clip_gridfinder('TWN')