In [36]:
import geopandas as gpd
import pandas as pd
from osgeo import ogr,gdal
import os
import xarray as xr
import rasterio
import numpy as np
import pyproj
from pygeos import from_wkb,from_wkt
import pygeos
from tqdm import tqdm
from shapely.wkb import loads
from pathlib import Path
import glob
from shapely.geometry import mapping
pd.options.mode.chained_assignment = None
from rasterio.mask import mask
#import rioxarray

In [2]:
gdal.SetConfigOption("OSM_CONFIG_FILE", os.path.join('..',"osmconf.ini"))

# change paths to make it work on your own machine
data_path = os.path.join('C:\\','data','pg_risk_analysis')
tc_path = os.path.join(data_path,'tc_netcdf')
fl_path = os.path.join(data_path,'GLOFRIS')
osm_data_path = os.path.join('C:\\','data','country_osm')
pg_data_path = os.path.join(data_path,'pg_data')

In [3]:
def query_b(geoType,keyCol,**valConstraint):
    """
    This function builds an SQL query from the values passed to the retrieve() function.
    Arguments:
         *geoType* : Type of geometry (osm layer) to search for.
         *keyCol* : A list of keys/columns that should be selected from the layer.
         ***valConstraint* : A dictionary of constraints for the values. e.g. WHERE 'value'>20 or 'value'='constraint'
    Returns:
        *string: : a SQL query string.
    """
    query = "SELECT " + "osm_id"
    for a in keyCol: query+= ","+ a  
    query += " FROM " + geoType + " WHERE "
    # If there are values in the dictionary, add constraint clauses
    if valConstraint: 
        for a in [*valConstraint]:
            # For each value of the key, add the constraint
            for b in valConstraint[a]: query += a + b
        query+= " AND "
    # Always ensures the first key/col provided is not Null.
    query+= ""+str(keyCol[0]) +" IS NOT NULL" 
    return query 


def retrieve(osm_path,geoType,keyCol,**valConstraint):
    """
    Function to extract specified geometry and keys/values from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.     
        *geoType* : Type of Geometry to retrieve. e.g. lines, multipolygons, etc.
        *keyCol* : These keys will be returned as columns in the dataframe.
        ***valConstraint: A dictionary specifiying the value constraints.  
        A key can have multiple values (as a list) for more than one constraint for key/value.  
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all columns, geometries, and constraints specified.    
    """
    driver=ogr.GetDriverByName('OSM')
    data = driver.Open(osm_path)
    query = query_b(geoType,keyCol,**valConstraint)
    sql_lyr = data.ExecuteSQL(query)
    features =[]
    # cl = columns 
    cl = ['osm_id'] 
    for a in keyCol: cl.append(a)
    if data is not None:
        print('query is finished, lets start the loop')
        for feature in tqdm(sql_lyr,desc='extract'):
            #try:
            if feature.GetField(keyCol[0]) is not None:
                geom1 = (feature.geometry().ExportToWkt())
                #print(geom1)
                geom = from_wkt(feature.geometry().ExportToWkt()) 
                if geom is None:
                    continue
                # field will become a row in the dataframe.
                field = []
                for i in cl: field.append(feature.GetField(i))
                field.append(geom)   
                features.append(field)
            #except:
            #    print("WARNING: skipped OSM feature")   
    else:
        print("ERROR: Nonetype error when requesting SQL. Check required.")    
    cl.append('geometry')                   
    if len(features) > 0:
        return pd.DataFrame(features,columns=cl)
    else:
        print("WARNING: No features or No Memory. returning empty GeoDataFrame") 
        return pd.DataFrame(columns=['osm_id','geometry'])

def power_polyline(osm_path):
    """
    Function to extract all energy linestrings from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'lines',['power','voltage'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #print(df) #check infra keys
    
    return df.reset_index(drop=True)

def power_polygon(osm_path): # check with joel, something was wrong here with extracting substations
    """
    Function to extract energy polygons from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """
    df = retrieve(osm_path,'multipolygons',['other_tags']) 
    
    df = df.loc[(df.other_tags.str.contains('power'))]   #keep rows containing power data         
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
    
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
            
    return df.reset_index(drop=True) 

def electricity(osm_path):
    """
    Function to extract building polygons from OpenStreetMap    
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with all unique building polygons.    
    """
    df = retrieve(osm_path,'multipolygons',['power'])
    
    df = df.reset_index(drop=True).rename(columns={'power': 'asset'})
    
    #df = df[df.asset!='generator']
    df['asset'].loc[df['asset'].str.contains('"power"=>"substation"', case=False)]  = 'substation' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"plant"', case=False)] = 'plant' #specify row
    
    #print(df)  #check infra keys
    
    df = df.loc[(df.asset == 'substation') | (df.asset == 'plant')]
    
    return df.reset_index(drop=True)

def retrieve_poly_subs(osm_path, w_list, b_list):
    """
    Function to extract electricity substation polygons from OpenStreetMap
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region
        for which we want to do the analysis.
        *w_list* :  white list of keywords to search in the other_tags columns
        *b_list* :  black list of keywords of rows that should not be selected
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique substation.
    """
    df = retrieve(osm_path,'multipolygons',['other_tags'])
    df = df[df.other_tags.str.contains('substation', case=False, na=False)]
    #df = df.loc[(df.other_tags.str.contains('substation'))]
    df = df[~df.other_tags.str.contains('|'.join(b_list))]
    #df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})
    df['asset']  = 'substation' #specify row
    #df = df.loc[(df.asset == 'substation')] #specify row
    return df.reset_index(drop=True)

def power_point(osm_path):
    """
    Function to extract energy points from OpenStreetMap  
    Arguments:
        *osm_path* : file path to the .osm.pbf file of the region 
        for which we want to do the analysis.        
    Returns:
        *GeoDataFrame* : a geopandas GeoDataFrame with specified unique energy linestrings.
    """   
    df = retrieve(osm_path,'points',['other_tags']) 
    df = df.loc[(df.other_tags.str.contains('power'))]  #keep rows containing power data       
    df = df.reset_index(drop=True).rename(columns={'other_tags': 'asset'})     
    
    #print(df)
    
    df['asset'].loc[df['asset'].str.contains('"power"=>"tower"', case=False)]  = 'power_tower' #specify row
    df['asset'].loc[df['asset'].str.contains('"power"=>"pole"', case=False)] = 'power_pole' #specify row
    #df['asset'].loc[df['asset'].str.contains('"utility"=>"power"', case=False)] = 'power_tower' #specify row
    
    df = df.loc[(df.asset == 'power_tower') | (df.asset == 'power_pole')]
            
    return df.reset_index(drop=True)

In [4]:
def reproject(df_ds,current_crs="epsg:4326",approximate_crs = "epsg:3857"):
    """[summary]

    Args:
        df_ds ([type]): [description]
        current_crs (str, optional): [description]. Defaults to "epsg:3857".
        approximate_crs (str, optional): [description]. Defaults to "epsg:4326".

    Returns:
        [type]: [description]
    """    

    geometries = df_ds['geometry']
    coords = pygeos.get_coordinates(geometries)
    transformer=pyproj.Transformer.from_crs(current_crs, approximate_crs,always_xy=True)
    new_coords = transformer.transform(coords[:, 0], coords[:, 1])
    
    return pygeos.set_coordinates(geometries.copy(), np.array(new_coords).T) 

def load_curves_maxdam(data_path,hazard='wind'): 
    """[summary]

    Args:
        data_path ([type]): [description]

    Returns:
        [type]: [description]
    """
    
    if hazard == 'wind':
        sheet_name = 'flooding_curves'
    elif hazard == 'flood':
        sheet_name = 'flooding_curves'
    
    # load curves and maximum damages as separate inputs
    curves = pd.read_excel(data_path,sheet_name=sheet_name,skiprows=8,index_col=[0])
    maxdam=pd.read_excel(data_path,sheet_name=sheet_name,index_col=[0]).iloc[:5]
    
    curves.columns = maxdam.columns

    #transpose maxdam so its easier work with the dataframe
    maxdam = maxdam.T

    #interpolate the curves to fill missing values
    curves = curves.interpolate()
   
    return curves,maxdam

def buffer_assets(assets,buffer_size=100):
    """[summary]

    Args:
        assets ([type]): [description]
        buffer_size (int, optional): [description]. Defaults to 100.

    Returns:
        [type]: [description]
    """    
    assets['buffered'] = pygeos.buffer(assets.geometry.values,buffer_size)
    return assets

def overlay_hazard_assets(df_ds,assets):
    """[summary]

    Args:
        df_ds ([type]): [description]
        assets ([type]): [description]

    Returns:
        [type]: [description]
    """
    #overlay 
    hazard_tree = pygeos.STRtree(df_ds.geometry.values)
    if (pygeos.get_type_id(assets.iloc[0].geometry) == 3) | (pygeos.get_type_id(assets.iloc[0].geometry) == 6):
        return  hazard_tree.query_bulk(assets.geometry,predicate='intersects')    
    else:
        return  hazard_tree.query_bulk(assets.buffered,predicate='intersects')
    
def get_damage_per_asset_per_rp(asset,df_ds,assets,curves,maxdam,return_period,country):
    """[summary]

    Args:
        asset ([type]): [description]
        df_ds ([type]): [description]
        assets ([type]): [description]
        grid_size (int, optional): [description]. Defaults to 90.

    Returns:
        [type]: [description]
    """    

    # find the exact hazard overlays:
    get_hazard_points = df_ds.iloc[asset[1]['hazard_point'].values].reset_index()
    get_hazard_points = get_hazard_points.loc[pygeos.intersects(get_hazard_points.geometry.values,assets.iloc[asset[0]].geometry)]

    asset_type = assets.iloc[asset[0]].asset
    asset_geom = assets.iloc[asset[0]].geometry

    if asset_type in ['plant','substation','generator']:
        maxdam_asset = maxdam.loc[asset_type].MaxDam/pygeos.area(asset_geom)
    else:
        maxdam_asset = maxdam.loc[asset_type].MaxDam


    hazard_intensity = curves[asset_type].index.values
    fragility_values = curves[asset_type].values
    
    if len(get_hazard_points) == 0:
        return asset[0],0
    else:
        
        if pygeos.get_type_id(asset_geom) == 1:
            get_hazard_points['overlay_meters'] = pygeos.length(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            return asset[0],np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,fragility_values))*get_hazard_points.overlay_meters*maxdam_asset)
        
        elif  pygeos.get_type_id(asset_geom) == 3:
            get_hazard_points['overlay_m2'] = pygeos.area(pygeos.intersection(get_hazard_points.geometry.values,asset_geom))
            return asset[0],get_hazard_points.apply(lambda x: np.interp(x[return_period], hazard_intensity, fragility_values)*maxdam_asset*x.overlay_m2,axis=1).sum()     
        
        else:
            return asset[0],np.sum((np.interp(get_hazard_points[return_period].values,hazard_intensity,fragility_values))*maxdam_asset)

In [5]:
def open_storm_data(climate_model):
    
    with xr.open_dataset(os.path.join(tc_path,'STORM_FIXED_RETURN_PERIODS{}_WP.nc'.format(climate_model))) as ds:
        """
        TC climate model:
            CMCC-CM2-VHR4
            CNRM-CM6-1-HR
            EC-Earth3P-HR
            HadGEM3-GC31-HM
        """
        
        # get the mean values
        df_ds = ds['mean'].to_dataframe().unstack(level=2).reset_index()

        # create geometry values and drop lat lon columns
        df_ds['geometry'] = [pygeos.points(x) for x in list(zip(df_ds['lon'],df_ds['lat']))]
        df_ds = df_ds.drop(['lat','lon'],axis=1,level=0)
        #print(df_ds)
        
        #rename columns to return periods
        return_periods = ['1_{}{}'.format(int(x),climate_model) for x in ds['rp']]
        df_ds.columns = ['1_{}{}'.format(int(x),climate_model) for x in list(df_ds.columns.get_level_values(1))[:-1]]+['geometry']     
        df_ds['geometry'] = pygeos.buffer(df_ds.geometry,radius=0.1/2,cap_style='square').values
        df_ds['geometry'] = reproject(df_ds)
            
        # drop all non values to reduce size
        #if climate_model == '':
        #    df_ds = df_ds.loc[~df_ds['1_10000'].isna()].reset_index(drop=True)
        
        df_ds = df_ds.loc[~df_ds['1_10000{}'.format(climate_model)].isna()].reset_index(drop=True)
        df_ds = df_ds.fillna(0)
        df_ds = df_ds[['1_{}{}'.format(int(x),climate_model) for x in [10,50,100,500,1000]]+['geometry']]
        #print(df_ds)

    return df_ds

# load hazard data 
def extract_wind_data():
    climate_models = ['_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
    df_ds = {}
    for climate_model in climate_models:
        df_ds_cl = open_storm_data(climate_model)
        df_ds[climate_model] = df_ds_cl
    
    return df_ds

In [6]:
open_storm_data('_CMCC-CM2-VHR4')

Unnamed: 0,1_10_CMCC-CM2-VHR4,1_50_CMCC-CM2-VHR4,1_100_CMCC-CM2-VHR4,1_500_CMCC-CM2-VHR4,1_1000_CMCC-CM2-VHR4,geometry
0,19.212739,19.212739,19.212739,19.212739,19.212739,"POLYGON ((11143081.028 568480.588, 11143081.02..."
1,19.759801,19.759801,19.759801,19.759801,19.759801,"POLYGON ((11154212.977 568480.588, 11154212.97..."
2,19.745395,19.745395,19.745395,19.745395,19.745395,"POLYGON ((11165344.927 568480.588, 11165344.92..."
3,18.615344,18.615344,18.615344,18.615344,18.615344,"POLYGON ((11176476.876 568480.588, 11176476.87..."
4,19.845578,19.845578,19.845578,19.845578,19.845578,"POLYGON ((11187608.825 568480.588, 11187608.82..."
...,...,...,...,...,...,...
384730,18.237884,18.237884,18.237884,19.609745,23.423505,"POLYGON ((20004112.496 8399737.89, 20004112.49..."
384731,18.080329,18.080329,18.080329,19.692856,23.656502,"POLYGON ((20015244.445 8399737.89, 20015244.44..."
384732,18.070724,18.070724,18.070724,19.649922,23.449598,"POLYGON ((20026376.394 8399737.89, 20026376.39..."
384733,18.192747,18.192747,18.192747,19.393939,23.184443,"POLYGON ((20037508.343 8399737.89, 20037508.34..."


In [48]:
def clip_flood_data(country_code,time_period='HIST'):
     
    # load country geometry file and create geometry to clip
    ne_countries = gpd.read_file('C:\\Data\\natural_earth\\ne_10m_admin_0_countries.shp') 
    geometry = ne_countries.loc[ne_countries['ISO_A3']==country_code].geometry.values[0]
    geoms = [mapping(geometry)]
    
    rps = ['0010','0050','0100','0500','1000']
    for rp in rps: 
        input_file = os.path.join(fl_path,time_period,'global',
                                  'inuncoast_historical_nosub_hist_rp{}_0.tif'.format(rp))

        # load raster file and save clipped version
        with rasterio.open(input_file) as src:
            out_image, out_transform = mask(src, geoms, crop=True) #rasterio.mask.mask(src, geoms, crop=True)
            out_meta = src.meta

            out_meta.update({"driver": "GTiff",
                     "height": out_image.shape[1],
                     "width": out_image.shape[2],
                     "transform": out_transform})

            file_path = os.path.join(fl_path,time_period,'country',
                                            '_'.join([country_code]+input_file.split('_')[3:]))
            
            with rasterio.open(file_path, "w", **out_meta) as dest:
                dest.write(out_image) 

def open_flood_data(country_code,time_period='HIST'):
    """
    # THIS STILL NEEDS TO BE TESTED WITH GLOFRIS DATA
    with xr.open_dataset(os.path.join(fl_path)) as ds: #, engine="rasterio"  | ,'HIST/inuncoast_historical_nosub_hist_rp0500_0.nc'
        df_ds = ds.to_dataframe().reset_index()
        df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
        df_ds = df_ds.rename(columns={'band_data': 'hazard_intensity'})
        df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
        df_ds = df_ds.dropna()
        df_ds = df_ds.reset_index(drop=True)
        df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=20/2,cap_style='square').values
        df_ds['geometry'] = reproject(df_ds)
    """    
        
    files = [x for x in os.listdir(os.path.join(fl_path,time_period,'country'))  if country_code in x ]

    collect_df_ds = [] 
    for file in files: 

        file_path = os.path.join(fl_path,time_period,'country',file)

        with xr.open_dataset(file_path) as ds: #, engine="rasterio"
            df_ds = ds.to_dataframe().reset_index()
            df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
            df_ds = df_ds.rename(columns={'band_data': file.split('_')[4]}) #rename to return period
            df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
            df_ds = df_ds.dropna()
            df_ds = df_ds.reset_index(drop=True)
            df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=20/2,cap_style='square').values
            df_ds['geometry'] = reproject(df_ds)
            collect_df_ds.append(df_ds)

    df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3]).merge(collect_df_ds[4])
    
    return df_all

In [49]:
open_flood_data('LAO')

Unnamed: 0,rp0010,geometry,rp0050,rp0100,rp0500,rp1000
0,0.0,"POLYGON ((12256739.768 3550231.461, 12256739.7...",0.0,0.0,0.0,0.0
1,0.0,"POLYGON ((12256739.768 3549156.386, 12256739.7...",0.0,0.0,0.0,0.0
2,0.0,"POLYGON ((12256739.768 3548081.403, 12256739.7...",0.0,0.0,0.0,0.0
3,0.0,"POLYGON ((12256739.768 3547006.51, 12256739.76...",0.0,0.0,0.0,0.0
4,0.0,"POLYGON ((12256739.768 3545931.71, 12256739.76...",0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
280983,0.0,"POLYGON ((13097201.923 2900842.81, 13097201.92...",0.0,0.0,0.0,0.0
280984,0.0,"POLYGON ((13098129.585 2909047.237, 13098129.5...",0.0,0.0,0.0,0.0
280985,0.0,"POLYGON ((13098129.585 2908021.438, 13098129.5...",0.0,0.0,0.0,0.0
280986,0.0,"POLYGON ((13098129.585 2906995.709, 13098129.5...",0.0,0.0,0.0,0.0


In [11]:
# can be deleted
%%time
fl_path = 'C:\Data\pg_risk_analysis\GLOFRIS'
time_period = 'HIST'
country_code = 'LAO'

files = [x for x in os.listdir(os.path.join(fl_path,time_period,'country'))  if country_code in x ]

collect_df_ds = [] 
for file in files: 

    file_path = os.path.join(fl_path,time_period,'country',file)

    with xr.open_dataset(file_path) as ds: #, engine="rasterio"
        df_ds = ds.to_dataframe().reset_index()
        df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
        df_ds = df_ds.rename(columns={'band_data': file.split('_')[4]}) #rename to return period
        df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
        df_ds = df_ds.dropna()
        df_ds = df_ds.reset_index(drop=True)
        df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=20/2,cap_style='square').values
        df_ds['geometry'] = reproject(df_ds)
        collect_df_ds.append(df_ds)
        
df_all = collect_df_ds[0].merge(collect_df_ds[1]).merge(collect_df_ds[2]).merge(collect_df_ds[3])

CPU times: total: 37.3 s
Wall time: 37.4 s


In [41]:
df_all

Unnamed: 0,rp0010,geometry,rp0050,rp0100,rp0500
0,0.0,"POLYGON ((12256739.768 3550231.461, 12256739.7...",0.0,0.0,0.0
1,0.0,"POLYGON ((12256739.768 3549156.386, 12256739.7...",0.0,0.0,0.0
2,0.0,"POLYGON ((12256739.768 3548081.403, 12256739.7...",0.0,0.0,0.0
3,0.0,"POLYGON ((12256739.768 3547006.51, 12256739.76...",0.0,0.0,0.0
4,0.0,"POLYGON ((12256739.768 3545931.71, 12256739.76...",0.0,0.0,0.0
...,...,...,...,...,...
280983,0.0,"POLYGON ((13097201.923 2900842.81, 13097201.92...",0.0,0.0,0.0
280984,0.0,"POLYGON ((13098129.585 2909047.237, 13098129.5...",0.0,0.0,0.0
280985,0.0,"POLYGON ((13098129.585 2908021.438, 13098129.5...",0.0,0.0,0.0
280986,0.0,"POLYGON ((13098129.585 2906995.709, 13098129.5...",0.0,0.0,0.0


In [42]:
# can be deleted
with xr.open_dataset(file_path) as ds: #, engine="rasterio"
    df_ds = ds.to_dataframe().reset_index()
    df_ds['geometry'] = pygeos.points(df_ds.x,y=df_ds.y)
    df_ds = df_ds.rename(columns={'band_data': 'hazard_intensity'})
    df_ds = df_ds.drop(['band','x', 'y','spatial_ref'], axis=1)
    df_ds = df_ds.dropna()
    df_ds = df_ds.reset_index(drop=True)
    df_ds.geometry= pygeos.buffer(df_ds.geometry,radius=20/2,cap_style='square').values
    df_ds['geometry'] = reproject(df_ds)

In [43]:
df_ds

Unnamed: 0,hazard_intensity,geometry
0,0.0,"POLYGON ((12256739.768 3550231.461, 12256739.7..."
1,0.0,"POLYGON ((12256739.768 3549156.386, 12256739.7..."
2,0.0,"POLYGON ((12256739.768 3548081.403, 12256739.7..."
3,0.0,"POLYGON ((12256739.768 3547006.51, 12256739.76..."
4,0.0,"POLYGON ((12256739.768 3545931.71, 12256739.76..."
...,...,...
280983,0.0,"POLYGON ((13097201.923 2900842.81, 13097201.92..."
280984,0.0,"POLYGON ((13098129.585 2909047.237, 13098129.5..."
280985,0.0,"POLYGON ((13098129.585 2908021.438, 13098129.5..."
280986,0.0,"POLYGON ((13098129.585 2906995.709, 13098129.5..."


# OSM data processing

In [27]:
def extract_osm_infrastructure(country_code,osm_data_path):

    # lines
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_lines_country = power_polyline(osm_path)
    power_lines_country['geometry'] = reproject(power_lines_country)
    power_lines_country = buffer_assets(power_lines_country.loc[power_lines_country.asset.isin(
        ['cable','minor_cable','line','minor_line'])],buffer_size=100).reset_index(drop=True)
    
    # polygons
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_poly_country = electricity(osm_path)
    power_poly_country['geometry'] = reproject(power_poly_country)
    
    # points
    osm_path = os.path.join(osm_data_path,'{}.osm.pbf'.format(country_code))
    power_points_country = power_point(osm_path)
    power_points_country['geometry'] = reproject(power_points_country)
    power_points_country = buffer_assets(power_points_country.loc[power_points_country.asset.isin(
        ['power_tower','power_pole'])],buffer_size=100).reset_index(drop=True)

    return power_lines_country,power_poly_country,power_points_country


ctry_power_infra = extract_osm_infrastructure('LAO',osm_data_path)

query is finished, lets start the loop


extract: 100%|██████████████████████████████████████████████████████████████████████| 447/447 [00:02<00:00, 159.36it/s]


query is finished, lets start the loop


extract: 100%|█████████████████████████████████████████████████████████████████████████| 25/25 [00:08<00:00,  3.05it/s]


query is finished, lets start the loop


extract: 100%|█████████████████████████████████████████████████████████████████| 45489/45489 [00:04<00:00, 9699.07it/s]


In [12]:
type(ctry_power_infra)

tuple

In [100]:
def assess_damage_osm(country_code,ctry_power_infra,hazard_type='tc'):
    
    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(data_path=os.path.join('..','data','infra_vulnerability_data.xlsx'))
    curves['line'] = 1 # remove this when things work!
    
    # read infrastructure data:
    power_lines,power_poly,power_points = ctry_power_infra
    
    if hazard_type=='tc':
        # read wind data
        climate_models = ['_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        df_ds = extract_wind_data()
        """    
        # calculate damaged lines in loop by country_code and climate_model
        damaged_lines = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],
                                                               power_lines).T,columns=['asset','hazard_point'])
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {}'.format(country_code,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,df_ds[climate_model],
                                                                                           power_lines,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])

        collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
        damaged_lines_country = power_lines.merge(pd.DataFrame(collect_line_damages,
                                                                                 columns=['return_period','index','damage']),
                                                                    left_index=True,right_on='index')
        damaged_lines_country = damaged_lines_country.drop(['buffered'],axis=1)
        damaged_lines[climate_model] = damaged_lines_country
        """
        # calculate damaged polygons in loop by country_code and climate_model
        damaged_poly = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],power_poly).T,
                                                              columns=['asset','hazard_point'])
            collect_poly_damages = []
            for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),desc='polygon damage calculation for {} {}'.format(country_code,climate_model)):
                for return_period in return_periods:
                    collect_poly_damages.append([return_period,get_damage_per_asset_per_rp(asset,df_ds[climate_model],
                                                                                           power_poly,
                                                                                           curves,maxdam,
                                                                                           return_period,
                                                                                           country_code)])

            collect_poly_damages = [(line[0],line[1][0],line[1][1]) for line in collect_poly_damages]
            print(collect_poly_damages[0])
            damaged_poly_country = power_poly.merge(pd.DataFrame(collect_poly_damages,columns=['return_period','index','damage']),left_index=True,right_on='index')
            damaged_poly[climate_model] = damaged_poly_country
        """
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],power_points).T,
                                          columns=['asset','hazard_point'])
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),desc='point damage calculation for {} {}'.format(country_code,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                            df_ds[climate_model],
                                                                                            power_points,
                                                                                            curves,
                                                                                            maxdam,
                                                                                            return_period,
                                                                                            country_code)])

            collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
            damaged_points_country = power_points.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),left_index=True,right_on='index')
            damaged_points_country = damaged_points_country.drop(['buffered'],axis=1)
            damaged_points[climate_model] = damaged_points_country
            """
        
    elif hazard_type=='fl':
        # read flood data
        df_ds = open_flood_data(country_code).head(50) # REMOVE .HEAD()
        #time_periods = []
        
        #for time_period in time_periods:
        return_periods = ['rp0010','rp0050','rp0100','rp0500','rp1000']
        
        # calculate damaged lines in loop by country_code and climate_model
        damaged_lines = {}
        overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds,power_lines).T,columns=['asset','hazard_point'])
        collect_line_damages = []
        for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                          desc='polyline damage calculation for {}'.format(country_code)):
            for return_period in return_periods:
                collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,df_ds,
                                                                                       power_lines,
                                                                                       curves,
                                                                                       maxdam,
                                                                                       return_period,
                                                                                       country_code)])

        print(collect_line_damages[0])
        collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
        damaged_lines = power_lines.merge(pd.DataFrame(collect_line_damages,
                                                                                 columns=['return_period','index','damage']),
                                                                    left_index=True,right_on='index')
        damaged_lines = damaged_lines_country.drop(['buffered'],axis=1)
        """
        # calculate damaged polygons in loop by country_code and climate_model
        damaged_poly = {}
        overlay_poly = pd.DataFrame(overlay_hazard_assets(df_ds,power_poly).T,
                                                          columns=['asset','hazard_point'])
        collect_poly_damages = []
        for asset in tqdm(overlay_poly.groupby('asset'),total=len(overlay_poly.asset.unique()),desc='polygon damage calculation for {}'.format(country_code)):
            for return_period in return_periods:
                collect_poly_damages.append([return_period,get_damage_per_asset_per_rp(asset,df_ds,
                                                                                       power_poly,
                                                                                       curves,maxdam,
                                                                                       return_period,
                                                                                       country_code)])

            collect_poly_damages = [(line[0],line[1][0],line[1][1]) for line in collect_poly_damages]
            print(collect_poly_damages[0])
            damaged_poly = power_poly.merge(pd.DataFrame(collect_poly_damages,columns=['return_period','index','damage']),left_index=True,right_on='index')
        
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds,power_points).T,
                                      columns=['asset','hazard_point'])
        collect_point_damages = []
        for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),desc='point damage calculation for {}'.format(country_code)):
            for return_period in return_periods:
                collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                        df_ds,
                                                                                        power_points,
                                                                                        curves,
                                                                                        maxdam,
                                                                                        return_period,
                                                                                        country_code)])

            print(collect_point_damages[1][1])
            collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
            damaged_points = power_points.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),left_index=True,right_on='index')
            damaged_points = damaged_points_country.drop(['buffered'],axis=1)
            """

    #return damaged_lines,damaged_poly,damaged_points
    return damaged_lines

ctry_damage_infra = assess_damage_osm('LAO',ctry_power_infra,'fl')

polyline damage calculation for LAO: 100%|███████████████████████████████████████████| 345/345 [00:06<00:00, 51.59it/s]


['rp0010', (0, 1201767797.788254)]


In [None]:
def country_analysis_osm(country_code,exposure_data='OSM',hazard_type='tc'): #
    
    if exposure_data == 'OSM':
        # extract infrastructure data from OSM
        ctry_power_infra = extract_osm_infrastructure(country_code,osm_data_path)

        # extract wind data
        # df_ds = extract_wind_data()
    
        # assess damage to wind storms
        #climate_models = ['_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        ctry_damage_infra = assess_damage_osm(country_code,ctry_power_infra,hazard_type)
    
        return ctry_damage_infra
    """
    elif exposure_data == 'PG':
        # extract power grid data
        ctry_power_infra = extract_pg_data(country_code,pg_type)

        # extract wind data
        df_ds = extract_wind_data()
    
        # assess damage to wind storms
        climate_models = ['_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
        ctry_damage_infra = assess_damage_infrastructure(country_code,ctry_power_infra,climate_models)
    
        return ctry_damage_infra
        """

ctry_damage_infra = country_analysis_osm('LAO') #,'line','PG'

In [115]:
ctry_damage_infra[1]['_CNRM-CM6-1-HR']

Unnamed: 0,osm_id,asset,geometry,return_period,index,damage
0,,plant,"MULTIPOLYGON (((11440952.95 2125594.573, 11440...",1_10_CNRM-CM6-1-HR,0,2989.453138
1,,plant,"MULTIPOLYGON (((11440952.95 2125594.573, 11440...",1_50_CNRM-CM6-1-HR,0,3593.237462
2,,plant,"MULTIPOLYGON (((11440952.95 2125594.573, 11440...",1_100_CNRM-CM6-1-HR,0,3867.059570
3,,plant,"MULTIPOLYGON (((11440952.95 2125594.573, 11440...",1_500_CNRM-CM6-1-HR,0,4374.063048
4,,plant,"MULTIPOLYGON (((11440952.95 2125594.573, 11440...",1_1000_CNRM-CM6-1-HR,0,4639.178446
...,...,...,...,...,...,...
105,,plant,"MULTIPOLYGON (((11832832.657 1731959.498, 1183...",1_10_CNRM-CM6-1-HR,21,1931.199196
106,,plant,"MULTIPOLYGON (((11832832.657 1731959.498, 1183...",1_50_CNRM-CM6-1-HR,21,2670.295005
107,,plant,"MULTIPOLYGON (((11832832.657 1731959.498, 1183...",1_100_CNRM-CM6-1-HR,21,2890.193758
108,,plant,"MULTIPOLYGON (((11832832.657 1731959.498, 1183...",1_500_CNRM-CM6-1-HR,21,3190.366191


# Government data processing

In [121]:
# load collected power grid data
def extract_pg_data(country_code,pg_type):

    pg_path = os.path.join(pg_data_path,'{}_{}.gpkg'.format(country_code,pg_type)) #e.g.,LAO_line
    pg_data_country = gpd.read_file(os.path.join(pg_path))
    
    pg_data_country = pd.DataFrame(pg_data_country.copy())
    #print(pg_data_country.head())
    pg_data_country.geometry = pygeos.from_shapely(pg_data_country.geometry)
    pg_data_country['geometry'] = reproject(pg_data_country)
    
    if pg_type == 'line':
        pg_data_country = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['line'])],buffer_size=100).reset_index(drop=True)
        return pg_data_country
    
    elif pg_type == 'point':
        pg_data_country = buffer_assets(pg_data_country.loc[pg_data_country.asset.isin(['point'])],buffer_size=100).reset_index(drop=True)
        return pg_data_country
    
    return pg_data_country

In [125]:
def assess_damage_pg(country_code,pg_data_country,pg_type='line'):
    
    # load curves and maxdam
    curves,maxdam = load_curves_maxdam(data_path=os.path.join('..','data','infra_vulnerability_data.xlsx'))
    curves['line'] = 1 # remove this when things work!
    
    # read wind data
    climate_models = ['_CMCC-CM2-VHR4','_CNRM-CM6-1-HR','_EC-Earth3P-HR','_HadGEM3-GC31-HM']
    df_ds = extract_wind_data()

    if pg_type=='line':        
        # calculate damaged lines in loop by country_code and climate_model
        damaged_lines = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_data_country).T,
                                         columns=['asset','hazard_point'])
            collect_line_damages = []
            for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),
                              desc='polyline damage calculation for {} {}'.format(country_code,climate_model)):
                for return_period in return_periods:
                    collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                           df_ds[climate_model],
                                                                                           pg_data_country,
                                                                                           curves,
                                                                                           maxdam,
                                                                                           return_period,
                                                                                           country_code)])

            collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
            damaged_lines_country = pg_data_country.merge(pd.DataFrame(collect_line_damages,columns=['return_period','index','damage']),
                                                          left_index=True,right_on='index')
            damaged_lines_country = damaged_lines_country.drop(['buffered'],axis=1)
            damaged_lines[climate_model] = damaged_lines_country
            
    elif pg_type=='point':
        # calculate damaged points in loop by country_code and climate_model
        damaged_points = {}
        for climate_model in climate_models:
            return_periods = ['1_10{}'.format(climate_model),'1_50{}'.format(climate_model),
                              '1_100{}'.format(climate_model),'1_500{}'.format(climate_model),'1_1000{}'.format(climate_model)]

            overlay_points = pd.DataFrame(overlay_hazard_assets(df_ds[climate_model],pg_data_country).T,
                                          columns=['asset','hazard_point'])
            collect_point_damages = []
            for asset in tqdm(overlay_points.groupby('asset'),total=len(overlay_points.asset.unique()),
                              desc='point damage calculation for {} {}'.format(country_code,climate_model)):
                for return_period in return_periods:
                    collect_point_damages.append([return_period,get_damage_per_asset_per_rp(asset,
                                                                                            df_ds[climate_model],
                                                                                            pg_data_country,
                                                                                            curves,
                                                                                            maxdam,
                                                                                            return_period,
                                                                                            country_code)])

            collect_point_damages = [(line[0],line[1][0],line[1][1]) for line in collect_point_damages]
            damaged_points_country = pg_data_country.merge(pd.DataFrame(collect_point_damages,columns=['return_period','index','damage']),
                                                        left_index=True,right_on='index')
            damaged_points_country = damaged_points_country.drop(['buffered'],axis=1)
            damaged_points[climate_model] = damaged_points_country
   
    return damaged_lines,damaged_points

In [126]:
pg_data_country = extract_pg_data('LAO','point')
pg_data_country.head(5)

Unnamed: 0,id,status,source,country,name,type,capacit_MW,operator,year,asset,value,involt_kV,outvolt_kV,capaci_kVA,units,layer,path,geometry,buffered
0,0,Existing,World Bank,Laos,,Hydro,,,,point,plant,,,,,lao33762powerstations,C:/Users/mye500/OneDrive - Vrije Universiteit ...,POINT (11361572.355 2244469.157),"POLYGON ((11361672.355 2244469.157, 11361670.4..."
1,1,Existing,World Bank,Laos,,Hydro,,,,point,plant,,,,,lao33762powerstations,C:/Users/mye500/OneDrive - Vrije Universiteit ...,POINT (11419824.439 2099840.655),"POLYGON ((11419924.439 2099840.655, 11419922.5..."
2,2,Existing,World Bank,Laos,,Hydro,,,,point,plant,,,,,lao33762powerstations,C:/Users/mye500/OneDrive - Vrije Universiteit ...,POINT (11453022.972 2103393.117),"POLYGON ((11453122.972 2103393.117, 11453121.0..."
3,3,Existing,World Bank,Laos,,Hydro,,,,point,plant,,,,,lao33762powerstations,C:/Users/mye500/OneDrive - Vrije Universiteit ...,POINT (11637642.554 2064169.509),"POLYGON ((11637742.554 2064169.509, 11637740.6..."
4,4,Existing,World Bank,Laos,,Hydro,,,,point,plant,,,,,lao33762powerstations,C:/Users/mye500/OneDrive - Vrije Universiteit ...,POINT (11699554.725 2082341.258),"POLYGON ((11699654.725 2082341.258, 11699652.8..."


In [129]:
assess_damage_infra = assess_damage_pg('LAO',pg_data_country,'point')
assess_damage_infra.head(5)

point damage calculation for LAO _CMCC-CM2-VHR4:   0%|                                          | 0/37 [00:00<?, ?it/s]


KeyError: 'point'

In [19]:
osm_data_path = os.path.join('C:\\','data','country_osm')
filelist = []
country_codes = []
for i in os.listdir(osm_data_path):
    osm_path = os.path.join(osm_data_path,i)
    if os.path.isfile(osm_path):
        filelist.append(i)
        country_codes.append(os.path.splitext(os.path.splitext(i)[0])[0])
        osm_path = os.path.join(osm_data_path,i)
        #print(osm_path)
print(country_codes)
country_codes = tuple(country_codes)

['CHN', 'IDN', 'JPN', 'KHM', 'KOR', 'LAO', 'LUX', 'MMR', 'MNG', 'PHL', 'PRK', 'THA', 'TWN', 'VNM']


In [None]:
# lines
overlay_lines = pd.DataFrame(overlay_hazard_assets(df_ds,pg_data).T,columns=['asset','hazard_point'])
#print(overlay_lines.asset.unique())

collect_line_damages = []
for asset in tqdm(overlay_lines.groupby('asset'),total=len(overlay_lines.asset.unique()),desc='polyline damage calculation for {}'.format(country_code)):
    for return_period in return_periods:
        collect_line_damages.append([return_period,get_damage_per_asset_per_rp(asset,df_ds,pg_data,curves,maxdam,return_period,country_code)])

collect_line_damages = [(line[0],line[1][0],line[1][1]) for line in collect_line_damages]
damaged_lines = pg_data.merge(pd.DataFrame(collect_line_damages,columns=['return_period','index','damage']),left_index=True,right_on='index')
damaged_lines = damaged_lines.drop(['buffered'],axis=1)
damaged_lines