## Add modelled impact to the gridded KGV dataset

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd 
import xarray as xr
import sys
import matplotlib.pyplot as plt
import datetime as dt
from scipy import sparse
import cartopy.crs as ccrs
from climada.engine import Impact, ImpactCalc
from climada import CONFIG
sys.path.append(str(CONFIG.local_data.func_dir))
import scClim as sc

data_dir = str(CONFIG.local_data.data_dir)
out_dir = str(CONFIG.local_data.out_dir)

Could not find bayes_opt. Module Calib_opt will not work.


In [None]:
#Calculate impacts
years = np.arange(2002,2021+1)
haz_var = 'MESHS'
event_def_version = 7
#load hazard data
paths = sc.E.get_hazard_files_TS(haz_var,years,event_def_version,data_dir)
haz = sc.hazard_from_radar(paths, extent=[5.8, 10.6, 45.7, 47.9], varname=haz_var) 

#Expsoure
exp = sc.read_xr_exposure(data_dir+'/KGV/ds_building_dmg_v7_1000m.nc','value_exposure')
exp_PAA = sc.read_xr_exposure(data_dir+'/KGV/ds_building_dmg_v7_1000m.nc','n_count_exposure')

#impact function
imp_fun_set = sc.impf_from_csv(data_dir + '/out_files/paa_mdd_smooth_%s%s_v%d.csv'%
                               ('KGV',haz_var,event_def_version),smooth=False,
                               emanuel_fit=True,plot=False)
imp_fun_set_PAA = sc.impf_from_csv(data_dir + '/out_files/paa_mdd_smooth_%s%s_v%d.csv'%('KGV',haz_var,event_def_version),
                                PAA_only=True,smooth=False,emanuel_fit=True,plot=False)

###############################################################################
#Impacts based on crowd-sourced data
paths_crowd = xr.open_dataset(data_dir+'/crowd-source/Reports_min100_2017-22.nc')
haz_crowd = sc.hazard_from_radar(paths_crowd, extent=[5.8, 10.6, 45.7, 47.9], varname='crowd') 


In [3]:
#observed impact
imp_measured = sc.read_xr_impact(data_dir+'/KGV/ds_building_dmg_v7_1000m.nc','value')
imp_measuredPAA = sc.read_xr_impact(data_dir+'/KGV/ds_building_dmg_v7_1000m.nc','n_count',unit='')
exp_str = 'KGV'


In [None]:
#calculate impacts
imp = ImpactCalc(exp, imp_fun_set, haz).impact(save_mat=True)
imp_PAA = ImpactCalc(exp_PAA, imp_fun_set_PAA, haz).impact(save_mat=True)

#scale impact to account for changing exposure
scale_factor_year = pd.read_csv(data_dir+'/out_files/constants/KGV_scale_factor.csv',index_col=0)
year=np.array([dt.datetime.fromordinal(d).year for d in imp.date])
scale_arr = scale_factor_year.loc[year,'scale_factor'].values
scale_arrPAA = scale_factor_year.loc[year,'scale_factorPAA'].values

#elementwise multiplication (1d scale_factor * 2d impact matrix)
atEvent = scale_arr*imp.at_event
impMat = sparse.csr_matrix(np.multiply(scale_arr[:,None],imp.imp_mat.todense()))
atEventPAA = scale_arrPAA*imp_PAA.at_event
impMatPAA = sparse.csr_matrix(np.multiply(scale_arrPAA[:,None],imp_PAA.imp_mat.todense()))

In [20]:
# load ds_KGV
ds_KGV = xr.open_dataset(data_dir+'/KGV/ds_building_dmg_v7_1000m.nc')

In [21]:
#extend ds_KGV with additional dates
imp_dates = [dt.datetime.fromordinal(int(d)) for d in imp.date[imp.at_event>0]]
impPAA_dates = [dt.datetime.fromordinal(int(d)) for d in imp_PAA.date[imp_PAA.at_event>0]]
#assert that all dates with either imp>0 or imp_PAA>0 are added
assert(all([d in impPAA_dates for d in imp_dates]))

#select dates that are not yet in ds_KGV
add_dates = np.array([dt.datetime.fromordinal(int(d)) for d in imp.date[imp_PAA.at_event>0] 
                      if dt.datetime.fromordinal(int(d)).strftime('%Y-%m-%d') not in 
                      ds_KGV.date.dt.strftime('%Y-%m-%d')])
print(len(add_dates))


140


In [26]:
#add new values to ds_KGV.date 
ds_KGV_add = ds_KGV[['value','Versicherungssumme','n_count','PAA','MDR']].copy(deep=True)

#make add_KGV the same length as add_dates and set date to add_dates
ds_KGV_add = ds_KGV_add.isel(date=slice(0,len(add_dates))) #=add_dates
ds_KGV_add['date'] = xr.DataArray(add_dates,dims='date')

#set all values to nan for dates where no impact is observed (but some impact is modelled)
for data_var in ['value','Versicherungssumme','n_count','PAA','MDR']:
    ds_KGV_add[data_var] = np.nan 

#merge with ds_KGV
ds_KGV = xr.merge([ds_KGV,ds_KGV_add])


## add modelled damage

In [34]:
lat = np.reshape(imp.coord_exp[:,0],(len(ds_KGV.chy),len(ds_KGV.chx)))
latOrig = ds_KGV.lat.values
np.testing.assert_array_almost_equal(lat,latOrig)
lon = np.reshape(imp.coord_exp[:,1],(len(ds_KGV.chy),len(ds_KGV.chx)))
lonOrig = ds_KGV.lon.values
np.testing.assert_array_almost_equal(lon,lonOrig)

In [None]:
sel_dates = np.array([dt.datetime.fromordinal(int(d)).strftime('%Y-%m-%d') in 
                      ds_KGV.date.dt.strftime('%Y-%m-%d') for d in imp.date]) 
                      #Note that sel_dates actually corresponds to all dates now
print(sum(sel_dates))
imp_sel = imp.select(event_ids=imp.event_id[sel_dates])
impResh= np.reshape(imp_sel.imp_mat.toarray(),(imp_sel.imp_mat.shape[0],len(ds_KGV.chy),len(ds_KGV.chx)))
ds_KGV['imp_MESHS'] = (('date','chy','chx'),impResh)

## add modelled PAA

In [None]:
np.testing.assert_array_equal(imp.date,imp_PAA.date)
np.testing.assert_array_equal(imp.coord_exp,imp_PAA.coord_exp)
impPAA_sel = imp_PAA.select(event_ids=imp_PAA.event_id[sel_dates])
impPAAResh= np.reshape(impPAA_sel.imp_mat.toarray(),(impPAA_sel.imp_mat.shape[0],len(ds_KGV.chy),len(ds_KGV.chx)))
ds_KGV['n_buildings_MESHS'] = (('date','chy','chx'),impPAAResh)

## add crowd_sourced modelled damages

In [None]:
impC = Impact.from_csv(data_dir+'/KGV/imp_modelled_crowd.csv')
impC.imp_mat = Impact.read_sparse_csr(data_dir+'/KGV/imp_modelled_crowd.npz')
sel_dates = np.array([dt.datetime.fromordinal(int(d)).strftime('%Y-%m-%d') in 
                      ds_KGV.date.dt.strftime('%Y-%m-%d') for d in impC.date])
impC_sel = impC.select(event_ids=impC.event_id[sel_dates])
impC_dates = np.array([dt.datetime.fromordinal(int(d)) for d in impC_sel.date])
impReshC= np.reshape(impC_sel.imp_mat.toarray(),
                     (impC_sel.imp_mat.shape[0],len(ds_KGV.chy),len(ds_KGV.chx)))
crowd_xr = xr.Dataset({'imp_crowd': (('date','chy','chx'),impReshC)},
                      coords={'date':impC_dates,'chy':ds_KGV.chy,'chx':ds_KGV.chx})
ds_KGV['imp_crowd'] =crowd_xr.imp_crowd

#PAA
impC_PAA = Impact.from_csv(data_dir+'/KGV/imp_modelledPAA_crowd.csv')
impC_PAA.imp_mat = Impact.read_sparse_csr(data_dir+'/KGV/imp_modelledPAA_crowd.npz')
np.testing.assert_array_equal(impC.date,impC_PAA.date)
np.testing.assert_array_equal(impC.coord_exp,impC_PAA.coord_exp)
impC_PAA_sel = impC_PAA.select(event_ids=impC_PAA.event_id[sel_dates])
# impPAACmat_sorted = impPAA_sel.imp_mat[impPAA_sel.date.argsort(),:]
impC_PAAResh= np.reshape(impC_PAA_sel.imp_mat.toarray(),
                         (impC_PAA_sel.imp_mat.shape[0],len(ds_KGV.chy),len(ds_KGV.chx)))

crowdPAA_xr = xr.Dataset({'imp_crowdPAA': (('date','chy','chx'),impC_PAAResh)},
                         coords={'date':impC_dates,'chy':ds_KGV.chy,'chx':ds_KGV.chx})
ds_KGV['n_buildings_crowd'] =crowdPAA_xr.imp_crowdPAA

In [43]:
#add modelled_PAA
ds_KGV['PAA_MESHS'] = ds_KGV.n_buildings_MESHS/ds_KGV.n_count_exposure
ds_KGV['PAA_crowd'] = ds_KGV.n_buildings_crowd/ds_KGV.n_count_exposure

#save as netcdf
ds_KGV.to_netcdf(data_dir+'/KGV/ds_building_dmg_v7_1000m_wModImp.nc',encoding={
    "value":            {'zlib':True,'complevel':5},
    "Versicherungssumme":{'zlib':True,'complevel':5},
    "n_count":          {'zlib':True,'complevel':5},
    "PAA":              {'zlib':True,'complevel':5},
    "MDR":              {'zlib':True,'complevel':5},
    "Baujahr_exposure": {'zlib':True,'complevel':5},
    "value_exposure":   {'zlib':True,'complevel':5},
    "n_count_exposure": {'zlib':True,'complevel':5},
    "imp_MESHS":        {'zlib':True,'complevel':5},
    "n_buildings_MESHS":{'zlib':True,'complevel':5},
    "imp_crowd":        {'zlib':True,'complevel':5},
    "n_buildings_crowd":{'zlib':True,'complevel':5},
    "PAA_MESHS":        {'zlib':True,'complevel':5},
    "PAA_crowd":        {'zlib':True,'complevel':5},
    #"MESHS":            {'zlib':True,'complevel':5}, #takes over 12min to compress
})