In [None]:
import pandas as pd
from datetime import datetime, timedelta
import xarray as xr
import numpy as np
from os.path import join, isfile
from copy import deepcopy

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../3-postprocess/'))

In [None]:
root = r'/scratch/compound_hotspots'
ddir = join(root, 'data', '4-postprocessed')

## 1. combine rivmouth attributes with rivmth reanalysis stats

In [None]:
fn_attrs_out = join(ddir, 'rivmth_mean_attrs.nc')

In [None]:
fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
ds = xr.open_zarr(fn_rivmth_ts)

In [None]:
fn_csv_coupling = join(root, 'src', '1-prepare', r'cmf_gtsm_75km_update191203.csv')
ccols = ['dist', 'dist2coast', 'rivwth', 'uparea', 'rivhgt', 'elevtn', 'gtsm_egm_offset', 
         'elvp10', 'elvp90', 'elvavg', 'mean_drain_length', 'mean_drain_slope']
coupling = pd.read_csv(fn_csv_coupling, index_col='index')[ccols]

In [None]:
# combine data
ds_drivers = ds.drop('WSE').mean('ensemble').sel(scen='surge').drop('scen')
ds_drivers['Htiderange'] = (ds_drivers['Htide_day_max'] - ds_drivers['Htide_day_min'])
ds_drivers_amax = ds_drivers[['Q', 'Hseas_day_mean', 'Hsurge_day_max', 'Hskewsurge_day', 'Htiderange']].resample(time='A').max().mean('time')
ds_drivers_amax = ds_drivers_amax.rename({n: '{}_amax'.format(n.split('_')[0]) for n in ds_drivers_amax.data_vars.keys()})
ds_drivers_amin = ds_drivers[['Hseas_day_mean']].resample(time='A').min().mean('time')
ds_drivers_amin = ds_drivers_amin.rename({n: '{}_amin'.format(n.split('_')[0]) for n in ds_drivers_amin.data_vars.keys()})
ds_drivers_mean = ds_drivers[['Q', 'Htiderange']].mean('time')
ds_drivers_mean = ds_drivers_mean.rename({n: '{}_mean'.format(n.split('_')[0]) for n in ds_drivers_mean.data_vars.keys()})

da_wse = ds['WSE'].sel(scen='surge').drop('scen')
da_wse_std = da_wse.std('time').reset_coords(drop=True).mean('ensemble')
da_wse_std.name = 'wse_std'

attrs = xr.merge([
    coupling[ccols].to_xarray(),
    ds_drivers_amax,
    ds_drivers_amin,
    ds_drivers_mean,
    da_wse_std
]).astype(np.float32).to_dataframe()

In [None]:
# add Q annual max properties
da_Q = ds.sel(scen='msl')['Q']
da_Q_am = da_Q.groupby('time.year').max('time')
da_Q_am_avg = da_Q_am.mean('year')
da_Q_am_std = da_Q_am.std('year')
da_Q_am_cv = da_Q_am_std / da_Q_am_avg
attrs['Q_amax'] = da_Q_am_avg.mean('ensemble')
attrs['Q_amax_cv'] = da_Q_am_cv.mean('ensemble')

# long term average Q
attrs['Q_mean'] = da_Q.mean('time').mean('ensemble')

# add Q and Hss annual max properties
da_Hskewsurge = ds['Hskewsurge_day']
da_Hskewsurge_am = da_Hskewsurge.groupby('time.year').max('time')
attrs['Hskewsurge_amax'] = da_Hskewsurge_am.mean('year')
attrs['Hskewsurge_amax_cv'] = da_Hskewsurge_am.std('year') / attrs['Hskewsurge_amax']

In [None]:
attrs = attrs[attrs.index!=3354] # drop weird outlier ? incorrect coupled ?
attrs.to_xarray().to_netcdf(fn_attrs_out)
attrs.to_csv(fn_attrs_out.replace('.nc','.csv'))

## 2. compute ensemble mean / significance results

In [None]:
# read rp, peaks and impact results
model='mean'
attrs = xr.open_dataset(fn_attrs_out)

### dominant driver

In [None]:
wdw = 1
fn_drivers = join(ddir, f'rivmth_drivers_wdw{wdw}.nc')
ds_drivers = xr.open_dataset(fn_drivers).sel(index=attrs.index)#.sel(ensemble=[model])

In [None]:
import scipy
import scipy.stats

def spearmanr(da0, da1, dim='time'):
    def _spearmanr(a, b):
        return np.asarray(scipy.stats.spearmanr(a,b))
    # apply_ufunc parameters
    kwargs = dict(               
        input_core_dims=[[dim], [dim]], 
        output_core_dims=[['stats']],
        dask='parallelized',
        output_dtypes=[float],    
        output_sizes={'stats': 2}, # on output, <dim> is reduced to length q.size 
        vectorize=True
    )
    da_out = xr.apply_ufunc(_spearmanr, da0, da1, **kwargs)
    da_out['stats'] = xr.Variable('stats', ['r', 'p'])
    return da_out.sel(stats='r').drop('stats'), da_out.sel(stats='p').drop('stats')

In [None]:
drivers = ['Hskewsurge','Q']

ds_spear = ds_drivers.coords.to_dataset().drop(['year'])
for d in drivers:
    ds_spear[f'{d}_r'], ds_spear[f'{d}_p'] = spearmanr(ds_drivers['h'], ds_drivers[d], dim='year')

ds_spear['H'] = np.logical_and(ds_spear[f'{drivers[0]}_r']>=0.0, ds_spear[f'{drivers[0]}_p']<=0.05)
ds_spear['Q'] = np.logical_and(ds_spear[f'{drivers[1]}_r']>=0.0, ds_spear[f'{drivers[1]}_p']<=0.05)
ds_spear['insign'] = np.logical_and(~ds_spear['H'], ~ds_spear['Q'])
ds_spear['compound'] = np.logical_and(ds_spear['H'], ds_spear['Q'])

N = ds_spear['ensemble'].size
N2 = int(np.ceil(N/2))
Hsign1 = ds_spear['H'].sum('ensemble') >= N2
Qsign1 = ds_spear['Q'].sum('ensemble') >= N2
compound1 = ds_spear['compound'].sum('ensemble') >= N2
insign1 = ds_spear['insign'].sum('ensemble') >= N2

ds_corr_stats = ds_drivers.coords.to_dataset().drop(['year', 'ensemble'])
ds_corr_stats['driver_H_sign'] = Hsign1
ds_corr_stats['driver_Q_sign'] = Qsign1
ds_corr_stats['driver_H_r'] = ds_spear[f'{drivers[0]}_r'].mean('ensemble')
ds_corr_stats['driver_Q_r'] = ds_spear[f'{drivers[1]}_r'].mean('ensemble')
# if N>1:
#     ds_corr_stats[f'driver_compound_(N={N})'] = ds_spear['compound'].sum('ensemble') >= N
ds_corr_stats[f'driver_compound'] = compound1
ds_corr_stats['driver_H'] = np.logical_and(np.logical_and(Hsign1, ~Qsign1), ~compound1)
ds_corr_stats['driver_Q'] = np.logical_and(np.logical_and(Qsign1, ~Hsign1), ~compound1)
ds_corr_stats['driver_insign'] = np.logical_and(np.logical_and(ds_corr_stats['driver_Q'], ~ds_corr_stats['driver_H']), ~compound1)

corr_sum = (ds_corr_stats.drop(['driver_H_r', 'driver_Q_r', 'driver_H_sign', 'driver_Q_sign']).sum()/ds_corr_stats.index.size*100).expand_dims('index').to_dataframe()
print(corr_sum.values.sum())
corr_sum.T

In [None]:
# to file 
fn_spear_out = join(ddir, f'rivmth_drivers_wdw{wdw}_spearmanrank.nc')
ds_spear.to_netcdf(fn_spear_out)

fn_drivers_out = join(ddir, f'rivmth_drivers_wdw{wdw}_ensemble-{model}.nc')
ds_corr_stats.to_netcdf(fn_drivers_out)

#### > data drivers correlations section 3.1

In [None]:
dlst = [f'compound', 'H', 'Q', 'insign'] # drivers
da_d = xr.concat([ds_spear[[f'{d}_r' for d in drivers]].where(ds_spear[d]).mean('index') for d in dlst], dim='driver').rename({f'{drivers[0]}_r': 'H_r' })
da_d['driver'] = xr.Variable('driver', [d for d in dlst])
da_d_all = xr.concat([ds_corr_stats[['driver_H_r', 'driver_Q_r']].where(ds_corr_stats[f'driver_{d}']).mean('index') for d in dlst], dim='driver').expand_dims('ensemble')
da_d_all = da_d_all.rename({v:v.replace('driver_','') for v in da_d_all.data_vars.keys()})
da_d_all['driver'] = xr.Variable('driver', [d for d in dlst])
da_d_all['ensemble'] = xr.Variable('ensemble', ['_N3'])
xr.concat([da_d, da_d_all], dim='ensemble').to_dataframe().unstack()

In [None]:
da_d = xr.concat([ds_spear[f'{d}'].where(~ds_spear['compound']).sum('index') if d != 'compound' else ds_spear[f'{d}'].sum('index')
                  for d in dlst], dim='driver')
da_d_all = xr.concat([ds_corr_stats[f'driver_{d}'].sum('index') for d in dlst], dim='driver').expand_dims('ensemble')
da_d_all['driver'] = xr.Variable('driver', [d for d in dlst])
da_d_all['ensemble'] = xr.Variable('ensemble', ['_N3'])
xr.concat([da_d, da_d_all], dim='ensemble').to_dataframe().unstack() / ds_spear.index.size * 100

### scenario analysis for extreme value statistics 
for water surface elevation at river mouth

In [None]:
fn_wse_ev = join(ddir, f'rivmth_wse_ev.nc')
ds_rp = xr.open_dataset(fn_wse_ev).sel(index=attrs.index)#.sel(ensemble=[model])

In [None]:
# difference in rp
alpha=0.025

da_wse_rps_ci = ds_rp['extreme_values_ci']
da_wse_rps = ds_rp['extreme_values']
    
# diff
diff_surge_seas = (da_wse_rps.sel(scen='surge') - da_wse_rps.sel(scen='seas'))
diff_surge_seas.name = 'diff_h_surge_seas'
diff_seas_tide = (da_wse_rps.sel(scen='seas') - da_wse_rps.sel(scen='tide'))
diff_seas_tide.name = 'diff_h_seas_tide'
diff_surge_tide = (da_wse_rps.sel(scen='surge') - da_wse_rps.sel(scen='tide'))
diff_surge_tide.name = 'diff_h_surge_tide'
ds_diff = xr.merge([
    diff_surge_seas,
    diff_seas_tide,
    diff_surge_tide,
])

dim = 'ensemble'
# average and calculate significance based on std error
N = ds_diff[dim].size
ds_diff_mean = ds_diff.mean(dim)
ds_diff_dir =  xr.ufuncs.fabs(xr.ufuncs.sign(ds_diff).sum(dim)) == N
# ds_diff_sign = xr.ufuncs.fabs(ds_diff_mean / ds_diff.std(dim)) > (2 / xr.ufuncs.sqrt(N-1))

ds_lst = []
for var in list(ds_diff.data_vars.keys()):
    v2,v3 = var.split('_')[-2:]
    da_sign = xr.where(
        xr.ufuncs.sign(ds_diff[var])>0,
        da_wse_rps_ci.sel(scen=v2, alpha=alpha) > da_wse_rps_ci.sel(scen=v3, alpha=1-alpha),
        da_wse_rps_ci.sel(scen=v3, alpha=alpha) > da_wse_rps_ci.sel(scen=v2, alpha=1-alpha),
    )#.drop('alpha')
    da_sign.name = var
    ds_lst.append(da_sign)
ds_diff_sign2 = xr.merge(ds_lst).sum('ensemble') >= N

ds_diff_h_stats = xr.merge([
    ds_diff_mean,
    ds_diff_dir.rename({v:f'{v}_sign' for v in ds_diff_dir.data_vars.keys()}),
    np.logical_and(ds_diff_sign2, ds_diff_dir).rename({v:f'{v}_sign_ci' for v in ds_diff_dir.data_vars.keys()}),
])


In [None]:
# to file 
fn_wse_ev_out = join(ddir, f'rivmth_wse_ev_ensemble-{model}.nc')
ds_diff_h_stats.to_netcdf(fn_wse_ev_out)

#### > data $\Delta$h section 3.2

In [None]:
# input to e.g. TABLE 3 in manuscript
rp = 10 # return period
df = ds_diff_h_stats.sel(T=rp).to_dataframe()
param = 'h'
scen = 'surge_tide' # total surge
scen = 'surge_seas' # daily
scen = 'seas_tide' # seasonal

df['diff_class'] = np.logical_and(df[f'diff_{param}_{scen}_sign'], df[f'diff_{param}_{scen}']>0) * 1 +\
                    np.logical_and(df[f'diff_{param}_{scen}_sign'], df[f'diff_{param}_{scen}']<0) * 2
df1 = df[['diff_class', f'diff_{param}_{scen}']] #, 'diff_h_surge_tide', 'Q_amax', 'Hskewsurge_amax', 'Hsurge_amax', 'Hseasrange', 'Htiderange_amax']]
print(df1[f'diff_{param}_{scen}'].mean())
print(df1.groupby('diff_class')['diff_class'].count()/df.index.size*100)
df1.groupby('diff_class').mean() #g1.std()

$\Delta$H per dominant driver

In [None]:
ds_stats = xr.merge([ds_diff_h_stats, 
                     ds_corr_stats]).sel(T=rp)

df = ds_stats.to_dataframe()
df['driver'] = df['driver_H']+df['driver_Q']*2+df[f'driver_compound']*3+df[f'driver_insign']*4
df1 = df[df['diff_h_surge_tide_sign']==1]
df[['driver', 'diff_h_surge_tide', 'diff_h_surge_seas', 'diff_h_seas_tide']].groupby('driver').mean()

### flood impact

In [None]:
fn_impact = join(ddir, 'rivmth_pop_affected.nc')
ds_impact = xr.open_dataset(fn_impact).sel(index=attrs.index)#.sel(ensemble=[model])

In [None]:
# take only locations with significant dH
sign_dH = (ds_diff_h_stats['diff_h_surge_tide_sign']>=1).sel(T=ds_impact['T'])

# integrate using trapeziodal rule to get expected annual mean
T0 = xr.DataArray(
    dims=('ensemble', 'T', 'index'), 
    coords={'ensemble':ds_impact.ensemble, 'T': [1], 'index': ds_impact.index}, 
    data=np.zeros((ds_impact.ensemble.size,1,ds_impact.index.size))
)
pop_affected_dH_dp = xr.concat([T0, ds_impact['people_affected_dH'].where(sign_dH)], dim='T')
pop_affected_dH_dp['p'] = xr.Variable('T', 1/pop_affected_dH_dp['T'].values)
pop_affected_dH_dp = pop_affected_dH_dp.sel(T=pop_affected_dH_dp['T'].values[::-1]).swap_dims({'T':'p'}) 
ds_impact['people_affected_dH_dp'] = pop_affected_dH_dp.integrate('p')
pop_affected_all_dp = xr.concat([T0, ds_impact['people_affected_all']], dim='T')
pop_affected_all_dp['p'] = xr.Variable('T', 1/pop_affected_all_dp['T'].values)
pop_affected_all_dp = pop_affected_all_dp.sel(T=pop_affected_all_dp['T'].values[::-1]).swap_dims({'T':'p'}) 
ds_impact['people_affected_all_dp'] = pop_affected_all_dp.integrate('p')
ds_impact_stats = ds_impact.mean('ensemble')

In [None]:
fn_impact_out = join(ddir, f'rivmth_pop_affected_ensemble-{model}.nc')
ds_impact_stats.to_netcdf(fn_impact_out)

#### > data population exposed section 3.3

In [None]:
ds_impact['people_affected_all'].where(sign_dH).sum('index').to_dataframe().unstack()/1e6

In [None]:
cols = ['people_affected_dH_dp', 'people_affected_all_dp']
df = ds_impact[cols].sum('index').to_dataframe()
for c in cols:
    df.loc['mean',c] = ds_impact_stats[c].sum('index').values
df = df/1e6
df['perc'] = df['people_affected_dH_dp'] / df['people_affected_all_dp'] * 100
df