In [1]:
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gp
from os.path import join, basename
from datetime import date, datetime
import os

In [2]:
import sys
import os
sys.path.append(os.path.abspath('../3-postprocess/'))
import xstats as xs 


In [3]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from plot_tools import *
import cartopy.crs as ccrs
import seaborn as sns

In [4]:
root = r'/scratch/compound_hotspots'
ddir = join(root, 'data', '4-postprocessed')
fdir = join(root, 'reports', 'figures')

In [5]:
rc = {'savefig.bbox': 'tight',  'savefig.format': 'png', 'savefig.dpi':300}
context = 'paper'
# sns.set(context=context, style='whitegrid', font_scale=0.75 if context == 'talk' else 1., rc=rc)
sns.set(context=context, style='whitegrid', font_scale=1.2 if context == 'paper' else 1., rc=rc)
s=30
crs = ccrs.Robinson()
crs_sub = ccrs.PlateCarree()
cmap_div = sns.diverging_palette(220, 10, s=75, l=40, sep=1, as_cmap=True)
cmap_turbo_div = ListedColormap([
    interpolate_cmap(google_turbo_data, x) for x in 
    np.hstack([np.linspace(0.1, .35, 50), np.linspace(0.6, 0.9, 50)])
])

bmap_kwargs = dict()
#     features=['land', 'rivers'],
#     feat_colors = [cfeature.COLORS['land_alt1'], cfeature.COLORS['water']],
# )

plot_kwargs=dict(edgecolor=(0.5, 0.5, 0.5, 0.8), linewidth=0.5, legend=False, zorder=2)
box_kwargs=dict(whis=[5,95], boxprops=dict(linewidth=1.), medianprops=dict(linewidth=1.5), 
                showfliers=False, flierprops=dict(markersize=2))


from string import ascii_uppercase as letters

locs = [1930, 1618, 809]
riv_names = {
    809: 'Mattapone', 
    1695: 'Weser',
    1930: 'Dal',
    1618: 'Volta',  
    2884: 'Ataran',
}

In [6]:
import scipy
def spearmanr(da0, da1, dim='time'):
    def _spearmanr(a, b):
        return np.asarray(scipy.stats.spearmanr(a,b))
    # apply_ufunc parameters
    kwargs = dict(               
        input_core_dims=[[dim], [dim]], 
        output_core_dims=[['stats']],
        dask='parallelized',
        output_dtypes=[float],    
        output_sizes={'stats': 2}, # on output, <dim> is reduced to length q.size 
        vectorize=True
    )
    da_out = xr.apply_ufunc(_spearmanr, da0, da1, **kwargs)
    da_out['stats'] = xr.Variable('stats', ['r', 'p'])
    return da_out.sel(stats='r').drop('stats'), da_out.sel(stats='p').drop('stats')

In [7]:
from scipy.stats import rankdata
from scipy.stats import ttest_ind
from scipy.interpolate import interp1d

def weibull(peaks, nyears=None):
    peaks = peaks[np.isfinite(peaks)]
    peaks_rank = rankdata(peaks, 'ordinal')
    P = peaks_rank/(peaks.size+1)
    freq = 1. if nyears is None else peaks.size / nyears
    rp = 1/(1-P)/freq
    return rp

def _interp_ev(peaks, vals, nyears=None):
    peaks = peaks[np.isfinite(peaks)]
    peaks.sort()
    peaks_rank = np.arange(peaks.size)+1
    P = peaks_rank/(peaks.size+1)
    freq = 1. if nyears is None else peaks.size / nyears
    rp = 1/(1-P)/freq
    kwargs = dict(
        kind='linear', bounds_error=False, assume_sorted=True,
        fill_value=(rp.min(), rp.max())
    )
    rp_out = interp1d(peaks, rp, **kwargs)(vals)
    return rp_out

In [8]:
attrs_fn = join(ddir, 'rivmth_mean_attrs.csv')
attrs = pd.read_csv(attrs_fn, index_col='index').rename(columns={'rivmth_lat':'lat', 'rivmth_lon':'lon'}).drop(3354)

attrs['uparea_log10'] = np.log10(np.maximum(attrs['uparea'].values, 0.001))
attrs['Hseasrange'] = attrs['Hseas_amax']-attrs['Hseas_amin']
attrs['mean_drain_length'] = attrs['mean_drain_length']/1e3 #[km]
attrs['mean_drain_slope'] = attrs['mean_drain_slope']*1e3 #[m/km]
attrs['uparea_100'] = attrs['uparea']/1e2
attrs['Hseasrange_cm'] = attrs['Hseasrange']*1e2
attrs['Hseas_amax_cm'] = attrs['Hseas_amax']*1e2
attrs['Hsurge_amax_cm'] = attrs['Hsurge_amax']*1e2
attrs['Q_amax_10'] = attrs['Q_amax']/10
attrs['Q_amax_norm'] = attrs['Q_amax']/attrs['Q_mean']
attrs['Hskewsurge_amax_cm'] = attrs['Hskewsurge_amax']*1e2


In [9]:
fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
ds = xr.open_zarr(fn_rivmth_ts).sel(scen='msl', index=attrs.index)

da_Q = ds['Q']
da_Q_am = da_Q.groupby('time.year').max('time')
da_Q_am_avg = da_Q_am.mean('year')
da_Q_am_std = da_Q_am.std('year')
da_Q_am_cv = da_Q_am_std / da_Q_am_avg

attrs['Q_amax'] = da_Q_am_avg.mean('ensemble')
attrs['Q_amax_cv'] = da_Q_am_cv.mean('ensemble')
attrs['Q_mean'] = da_Q.mean('time').mean('ensemble')

da_Hskewsurge = ds['Hskewsurge_day']
da_Hskewsurge_am = da_Hskewsurge.groupby('time.year').max('time')
attrs['Hskewsurge_amax'] = da_Hskewsurge_am.mean('year')


## compile data

In [10]:
# read rp, peaks and impact results
wdw = 1
fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
fn_peaks_wdw = join(ddir, f'rivmth_h_am_wdw{wdw}.nc')
fn_peaks_rp_ci = join(ddir, f'rivmth_swe_am_ci.nc')
fn_impact = join(ddir, 'rivmth_pop_affected.nc')
model='mean'

ds_rp = xr.open_dataset(fn_peaks_rp_ci).drop_sel(index=3354)#.sel(ensemble=[model])
ds_impact = xr.open_dataset(fn_impact).drop_sel(index=3354)#.sel(ensemble=[model])
ds_peaks = xr.open_dataset(fn_peaks_wdw).drop_sel(index=3354)#.sel(ensemble=[model])


In [13]:
ds_impact

In [None]:
# difference in rp
alpha=0.025

da_wse_rps_ci = ds_rp['WSE_ev_ci']
da_wse_rps = ds_rp['WSE_ev']
    
# diff
diff_surge_seas = (da_wse_rps.sel(scen='surge') - da_wse_rps.sel(scen='seas'))
diff_surge_seas.name = 'diff_h_surge_seas'
diff_seas_tide = (da_wse_rps.sel(scen='seas') - da_wse_rps.sel(scen='tide'))
diff_seas_tide.name = 'diff_h_seas_tide'
diff_surge_tide = (da_wse_rps.sel(scen='surge') - da_wse_rps.sel(scen='tide'))
diff_surge_tide.name = 'diff_h_surge_tide'
ds_diff = xr.merge([
    diff_surge_seas,
    diff_seas_tide,
    diff_surge_tide,
])

dim = 'ensemble'
# average and calculate significance based on std error
N = ds_diff[dim].size
ds_diff_mean = ds_diff.mean(dim)
ds_diff_dir =  xr.ufuncs.fabs(xr.ufuncs.sign(ds_diff).sum(dim)) == N
# ds_diff_sign = xr.ufuncs.fabs(ds_diff_mean / ds_diff.std(dim)) > (2 / xr.ufuncs.sqrt(N-1))

ds_lst = []
for var in list(ds_diff.data_vars.keys()):
    v2,v3 = var.split('_')[-2:]
    da_sign = xr.where(
        xr.ufuncs.sign(ds_diff[var])>0,
        da_wse_rps_ci.sel(scen=v2, alpha=alpha) > da_wse_rps_ci.sel(scen=v3, alpha=1-alpha),
        da_wse_rps_ci.sel(scen=v3, alpha=alpha) > da_wse_rps_ci.sel(scen=v2, alpha=1-alpha),
    )#.drop('alpha')
    da_sign.name = var
    ds_lst.append(da_sign)
ds_diff_sign2 = xr.merge(ds_lst).sum('ensemble') >= N

ds_diff_h_stats = xr.merge([
    ds_diff_mean,
    ds_diff_dir.rename({v:f'{v}_sign' for v in ds_diff_dir.data_vars.keys()}),
    np.logical_and(ds_diff_sign2, ds_diff_dir).rename({v:f'{v}_sign_ci' for v in ds_diff_dir.data_vars.keys()}),
])

In [None]:
# difference in scale and loc parameters
da_scale = ds_rp['params'].sel(par='scale').drop('par')
da_loc = ds_rp['params'].sel(par='loc').drop('par')
    
# diff
offset = attrs['gtsm_egm_offset'].to_xarray()
dscale_surge_tide = (da_scale.sel(scen='surge') - da_scale.sel(scen='tide'))
dscale_surge_tide.name = 'diff_scale_surge_tide'
dloc_surge_tide = (da_loc.sel(scen='surge') - da_loc.sel(scen='tide'))
dloc_surge_tide.name = 'diff_loc_surge_tide'
dscale_surge_seas = (da_scale.sel(scen='surge') - da_scale.sel(scen='seas'))
dscale_surge_seas.name = 'diff_scale_surge_seas'
dloc_surge_seas = (da_loc.sel(scen='surge') - da_loc.sel(scen='seas'))
dloc_surge_seas.name = 'diff_loc_surge_seas'
dscale_seas_tide = (da_scale.sel(scen='seas') - da_scale.sel(scen='tide'))
dscale_seas_tide.name = 'diff_scale_seas_tide'
dloc_seas_tide = (da_loc.sel(scen='seas') - da_loc.sel(scen='tide'))
dloc_seas_tide.name = 'diff_loc_seas_tide'

ds_diff = xr.merge([
    dscale_surge_tide,
    dloc_surge_tide,
    dscale_surge_seas,
    dloc_surge_seas,
    dscale_seas_tide,
    dloc_seas_tide,
])

dim = 'ensemble'
# average and calculate significance based on std error
N = ds_diff[dim].size
ds_diff_mean = ds_diff.mean(dim)
ds_diff_dir =  xr.ufuncs.fabs(xr.ufuncs.sign(ds_diff).sum(dim)) == N
# ds_diff_sign = xr.ufuncs.fabs((ds_diff_mean-1) / ds_diff.std(dim)) > (2 / xr.ufuncs.sqrt(N-1))
# var = 'diff_surge_tide'
# ds_diff_sign = ds_diff_stats[var].sel(T=rp, stat='sign').drop('stat')

ds_diff_par_stats = xr.merge([
    ds_diff_mean,
    ds_diff_dir.rename({v:f'{v}_sign' for v in ds_diff_dir.data_vars.keys()}),
]) 

In [None]:
drivers = ['Hskewsurge','Q']

ds_spear = ds_peaks.coords.to_dataset().drop(['year'])
for d in drivers:
    ds_spear[f'{d}_r'], ds_spear[f'{d}_p'] = spearmanr(ds_peaks['h'], ds_peaks[d], dim='year')

ds_spear['H'] = np.logical_and(ds_spear[f'{drivers[0]}_r']>=0.0, ds_spear[f'{drivers[0]}_p']<=0.05)
ds_spear['Q'] = np.logical_and(ds_spear[f'{drivers[1]}_r']>=0.0, ds_spear[f'{drivers[1]}_p']<=0.05)
ds_spear['insign'] = np.logical_and(~ds_spear['H'], ~ds_spear['Q'])
ds_spear['compound'] = np.logical_and(ds_spear['H'], ds_spear['Q'])

N = ds_spear['ensemble'].size
N2 = int(np.ceil(N/2))
Hsign1 = ds_spear['H'].sum('ensemble') >= N2
Qsign1 = ds_spear['Q'].sum('ensemble') >= N2
compound1 = ds_spear['compound'].sum('ensemble') >= N2
insign1 = ds_spear['insign'].sum('ensemble') >= N2

ds_corr_stats = ds_peaks.coords.to_dataset().drop(['year', 'ensemble'])
ds_corr_stats['driver_H_sign'] = Hsign1
ds_corr_stats['driver_Q_sign'] = Qsign1
ds_corr_stats['driver_H_r'] = ds_spear[f'{drivers[0]}_r'].mean('ensemble')
ds_corr_stats['driver_Q_r'] = ds_spear[f'{drivers[1]}_r'].mean('ensemble')
# if N>1:
#     ds_corr_stats[f'driver_compound_(N={N})'] = ds_spear['compound'].sum('ensemble') >= N
ds_corr_stats[f'driver_compound'] = compound1
ds_corr_stats['driver_H'] = np.logical_and(np.logical_and(Hsign1, ~Qsign1), ~compound1)
ds_corr_stats['driver_Q'] = np.logical_and(np.logical_and(Qsign1, ~Hsign1), ~compound1)
ds_corr_stats['driver_insign'] = np.logical_and(np.logical_and(~ds_corr_stats['driver_Q'], ~ds_corr_stats['driver_H']), ~compound1)
corr_sum = (ds_corr_stats.drop(['driver_H_r', 'driver_Q_r', 'driver_H_sign', 'driver_Q_sign']).sum()/ds_corr_stats.index.size*100).expand_dims('index').to_dataframe()
print(corr_sum.values.sum())
corr_sum.T

In [None]:
T0 = xr.DataArray(
    dims=('ensemble', 'T', 'index'), 
    coords={'ensemble':ds_impact.ensemble, 'T': [1], 'index': ds_impact.index}, 
    data=np.zeros((ds_impact.ensemble.size,1,ds_impact.index.size))
)
pop_affected_dH_dp = xr.concat([T0, ds_impact['people_affected_dH']], dim='T')
pop_affected_dH_dp['p'] = xr.Variable('T', 1/pop_affected_dH_dp['T'].values)
pop_affected_dH_dp = pop_affected_dH_dp.sel(T=pop_affected_dH_dp['T'].values[::-1]).swap_dims({'T':'p'}) 
ds_impact['people_affected_dH_dp'] = pop_affected_dH_dp.integrate('p')
pop_affected_all_dp = xr.concat([T0, ds_impact['people_affected_all']], dim='T')
pop_affected_all_dp['p'] = xr.Variable('T', 1/pop_affected_all_dp['T'].values)
pop_affected_all_dp = pop_affected_all_dp.sel(T=pop_affected_all_dp['T'].values[::-1]).swap_dims({'T':'p'}) 
ds_impact['people_affected_all_dp'] = pop_affected_all_dp.integrate('p')
ds_impact

In [None]:
rp = 10
ds_stats = xr.merge([ds_diff_h_stats, 
#                      ds_diff_par_stats, 
                     ds_impact.mean('ensemble'), ds_corr_stats])
gdf = pandas2geopandas(pd.concat([
    attrs,
    ds_stats.sel(T=rp).reset_coords(drop=True).to_dataframe()
], axis=1))
gdf['people_affected_dH_perc'] = gdf['people_affected_dH'] /  np.maximum(gdf['people_affected_all'],1) * 100
gdf['people_affected_dH_dp_perc'] = gdf['people_affected_dH_dp'] /  np.maximum(gdf['people_affected_all_dp'],1) * 100
gdf['people_affected_dH_dp_percLECZ'] = np.minimum(gdf['people_affected_dH_dp'] / np.maximum(gdf['people_lecz'],1) * 100, 100)
gdf['people_affected_dH_dp_percALL'] = gdf['people_affected_dH_dp'] /  np.maximum(gdf['people_all'],1) * 100

In [None]:
param = 'h'
scen = 'surge_tide'
gdf['diff_class'] = np.logical_and(gdf[f'diff_{param}_{scen}_sign'], gdf[f'diff_{param}_{scen}']>0) * 1 +\
                    np.logical_and(gdf[f'diff_{param}_{scen}_sign'], gdf[f'diff_{param}_{scen}']<0) * 2
gdf1 = gdf[['diff_class', f'diff_{param}_{scen}']] #, 'diff_h_surge_tide', 'Q_amax', 'Hskewsurge_amax', 'Hsurge_amax', 'Hseasrange', 'Htiderange_amax']]
print(gdf1.groupby('diff_class')['diff_class'].count()/gdf.index.size*100)
# print(gdf1.mean())
gdf1.groupby('diff_class').mean() #g1.std()

In [None]:
dlst = [f'compound', 'H', 'Q', 'insign']
da_d = xr.concat([ds_spear[[f'{d}_r' for d in drivers]].where(ds_spear[d]).mean('index') for d in dlst], dim='driver').rename({f'{drivers[0]}_r': 'H_r' })
da_d['driver'] = xr.Variable('driver', [d for d in dlst])
da_d_all = xr.concat([ds_corr_stats[['driver_H_r', 'driver_Q_r']].where(ds_corr_stats[f'driver_{d}']).mean('index') for d in dlst], dim='driver').expand_dims('ensemble')
da_d_all = da_d_all.rename({v:v.replace('driver_','') for v in da_d_all.data_vars.keys()})
da_d_all['driver'] = xr.Variable('driver', [d for d in dlst])
da_d_all['ensemble'] = xr.Variable('ensemble', ['_N3'])
xr.concat([da_d, da_d_all], dim='ensemble').to_dataframe().unstack()

In [None]:
da_d = xr.concat([ds_spear[f'{d}'].where(~ds_spear['compound']).sum('index') if d != 'compound' else ds_spear[f'{d}'].sum('index')
                  for d in dlst], dim='driver')
da_d_all = xr.concat([ds_corr_stats[f'driver_{d}'].sum('index') for d in dlst], dim='driver').expand_dims('ensemble')
da_d_all['driver'] = xr.Variable('driver', [d for d in dlst])
da_d_all['ensemble'] = xr.Variable('ensemble', ['_N3'])
xr.concat([da_d, da_d_all], dim='ensemble').to_dataframe().unstack() / ds_spear.index.size * 100

In [None]:
gdf['driver'] = gdf['driver_H']+gdf['driver_Q']*2+gdf[f'driver_compound']*3+gdf[f'driver_insign']*4
gdf1 = gdf[gdf['diff_h_surge_tide_sign']==1]
gdf[['driver', 'diff_h_surge_tide', 'diff_h_surge_seas', 'diff_h_seas_tide']].groupby('driver').mean()
gdf[['driver', 'people_affected_dH']].groupby('driver').sum() #/ gdf[['driver', 'people_lecz']].groupby('driver').sum().values
gdf[['driver', 'people_affected_all']].groupby('driver').sum()/ gdf[['driver', 'people_affected_all']].groupby('driver').sum().values.sum()
gdf[['driver', 'people_affected_dH']].groupby('driver').sum()/ gdf[['driver', 'people_affected_all']].groupby('driver').sum().values.sum()

In [None]:
sign_dH = (ds_stats['diff_h_surge_tide_sign']==1).sel(T=ds_impact['T'])
pop_affected_dH = ds_impact['people_affected_dH'].where(sign_dH).sum('index')
pop_affected_all = ds_impact['people_affected_all'].where(sign_dH).sum('index')
pop_lecz = ds_impact['people_lecz'].sum('index').expand_dims('index')
pop_all = ds_impact['people_all'].sum('index').expand_dims('index')
df = (pop_affected_dH/pop_affected_all*100).to_series().unstack(0)
df['mean'] = df.mean(axis=1)
df

In [None]:
df = (ds_impact['people_affected_dH_dp'].sum('index')/ds_impact['people_affected_all_dp'].sum('index')*100).to_series()
df['mean'] = df.mean()
df

## location plots

In [None]:
scen_cmap = {
    'surge': np.asarray(plt.cm.tab10.colors[0]), 
    'seas': np.asarray(plt.cm.tab10.colors[3]), 
    'tide': np.asarray(plt.cm.tab10.colors[4])
}
scen_mmap  = {'surge': 'o', 'seas': '^', 'tide': 'd'}
scen_nmap  = {'seas': 'seasonal'}

plt.close('all')
model2=model if model != 'mean' else 'nerc'
xlim = [0.9,40]
rps2 = [1, 2, 4, 8, 16, 32]
shape=(1,len(locs))
fig = plt.figure(figsize=(len(locs)*4, 3.5))
grid = plt.GridSpec(*shape, hspace=0.1, wspace=0.2)
axes =[]
for irow in range(shape[0]):
    for icol in range(shape[1]):
        axes.append(fig.add_subplot(grid[irow, icol]))
    
posn = axes[-1].get_position()
axg = fig.add_axes([posn.x1+0.01, posn.y0-0.2, posn.width*0.8, posn.height], projection=crs_sub) # new ax   
basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, features=['land'])
gdf.loc[locs,:].plot(ax=axg, marker='o', color='red', markersize=50, legend=True)
attrs['z0'] = 0 #attrs['elevtn']-attrs['rivhgt']

for i, loc in enumerate(locs):
    name = riv_names.get(loc, 'unknown')
    gdf_loc = gdf.loc[loc,:]    
    irow = i // shape[1]
    icol = i % shape[1]
    ax = axes[i]
    
    ds_ev_loc = ds_rp.sel(index=loc)
    z0 = attrs.loc[loc, 'z0']
    T = ds_ev_loc.T.values
    for scen in ['surge', 'seas', 'tide'][::-1]:
        _ds = ds_ev_loc.sel(scen=scen, ensemble=model2)
        peaks_am = _ds['WSE_am'].values-z0
        Tpeaks_am = weibull(peaks_am)
        ev_ci = _ds['WSE_ev_ci'].sel(alpha=[alpha, 1-alpha]).values - z0
        ev = _ds['WSE_ev'].values - z0
        ax.scatter(Tpeaks_am, peaks_am, c=scen_cmap[scen], marker=scen_mmap[scen], label=scen_nmap.get(scen, scen), zorder=2)
        ax.plot(T, ev, color=scen_cmap[scen], zorder=1)
        ax.plot(T, ev_ci[0,:], color=scen_cmap[scen], linestyle='-.', alpha=0.4, zorder=0)
        ax.plot(T, ev_ci[1,:], color=scen_cmap[scen], linestyle='-.', alpha=0.4, zorder=0)
    ax.set_xscale('log')
    if irow == 0:
        ax.set_xticks(rps2)
        ax.set_xticklabels(rps2)
        ax.set_xlabel('return period [years]')
    else:
        ax.set_xticks(rps2)
        ax.set_xticklabels([])
    if icol == 0:
        ax.set_ylabel('water surface elevation [m+EGM96]')
    ax.set_xlim(xlim)
    ax.grid(False)

    l = letters[i]
    ymin, ymax = ax.get_ylim()
    ax.text(xlim[0], ymax+(ymax-ymin)*0.01, f'{l}. {name} River', fontsize='large')
    x, y = gdf_loc.geometry.coords[0]
    axg.text(x+10, y, letters[i], transform=crs_sub)
    
    ax.grid(False)

ax.legend(loc='lower left', bbox_to_anchor=(1.05, 0.5), title='sea boundary')

fn_fig = join(fdir, f'locs_rivmth_peaks_gumb_ci{alpha}_{model2}.png')
print(basename(fn_fig))
plt.savefig(fn_fig)

In [None]:
from scipy.stats import rankdata
from scipy.interpolate import interp1d
from peaks import get_peaks
rm = {'Hskewsurge_day':'Hskewsurge', 'Htot_day_max': 'Htot', 'Htide_day_max': 'Htide', 'Hsurge_day_max': 'Hsurge'}
ds = xr.open_zarr(fn_rivmth_ts).sel(scen='surge').drop(['Hsurge', 'Htide']).rename(rm)
ds1 = ds.sel(time=slice('01-01-2000', '31-12-2014'))

rps = np.array([0.2, 1, 2, 4, 8, 16, 32])
# cmap = ListedColormap(google_turbo_data[50:])
# cmap.set_over('black')
cmap = plt.cm.viridis_r
norm = BoundaryNorm(rps[rps>=1], cmap.N)

mmap = {
    'Hsurge': '^',
    'Hskewsurge': '^',
    'Q': 's',
}
cmmap = {
    'Hsurge': plt.cm.tab10.colors[0],
    'Hskewsurge': plt.cm.tab10.colors[0],
    'Q': plt.cm.tab10.colors[2],
}
labs = {
    'Hsurge': '$H_{surge}$',
    'Hskewsurge': '$H_{SS}$',
    'Q': 'Q',
}


plt.close('all')
xlim = [0.1,40]
shape=(2,len(locs))
fig = plt.figure(figsize=(len(locs)*4, 8.5))
grid = plt.GridSpec(*shape, hspace=0.15, wspace=0.15)

kwargs = dict(cmap=cmap, norm=norm, linewidth=0.5, edgecolor='k', s=35)

for i, loc in enumerate(locs):
    name = riv_names.get(loc,'unknown')
    gdf_loc = gdf.loc[loc,:]    
    irow = i // shape[1]
    icol = i % shape[1]
    
    # get data
    ts_loc = ds.sel(index=loc, ensemble=model2).reset_coords(drop=True)
    df_loc = ds_peaks.sel(index=loc, ensemble=model2).reset_coords(drop=True).to_dataframe()
#     df_loc[~np.isfinite(df_loc)] = 0.9

    # plot rp
    ax = fig.add_subplot(grid[irow, icol])
    c = weibull(df_loc['h'].values)
    ythresh = np.percentile(ts_loc[drivers[0]], 75)
    xthresh = np.percentile(ts_loc[drivers[1]], 75)
    ypeaks = get_peaks(ts_loc[drivers[0]], min_dist=30)
    ypeaks = ypeaks.where(ypeaks>ythresh).dropna('time').values #.groupby('time.year').max('time').values
    xpeaks = get_peaks(ts_loc[drivers[1]], min_dist=45)
    xpeaks = xpeaks.where(xpeaks>xthresh).dropna('time').values #.groupby('time.year').max('time').values
    xx = np.maximum(rps[0]*1.1,_interp_ev(xpeaks, df_loc[f'{drivers[1]}'].values, 35))
    yy = np.maximum(rps[0]*1.1,_interp_ev(ypeaks, df_loc[f'{drivers[0]}'].values, 35))
    im = ax.scatter(x=xx, y=yy, c=c, **kwargs)
    
    # plot rank correlation
    ax1 = fig.add_subplot(grid[irow+1, icol])
    for v in drivers:
        ax1.plot(rankdata(df_loc[v]), rankdata(df_loc['h']), color=cmmap[v], label=labs[v], marker=mmap[v], 
                 linewidth=0, markeredgewidth=0.7, markeredgecolor='grey', markersize=5 if mmap[v]=='s' else 6)
    ax1.plot([0, 41],[0, 41],'--k',zorder=-1)
    
    # labels
    ax.set_xscale('log')
    ax.set_yscale('log')
    if i == 0:
        ax.set_yticks(rps)
        ax.set_yticklabels(rps)
        ax.set_ylabel(labs[drivers[0]]+' return period [years]')
        ax1.set_ylabel('riverine water level rank [-]')
    else:
        ax.set_yticklabels([])
        ax.set_ylabel("")
        ax1.set_yticklabels([])
        ax1.set_ylabel("")
    ax.set_xticks(rps)
    ax.set_xticklabels(rps)
    ax.set_xlabel(labs[drivers[1]]+' return period [years]')
    ax1.set_xlabel('driver rank [-]')
    ax.set_title(f'{letters[i]}. {name} River')
    
    # axis
    ax.set_xlim([rps[0], 38])
    ax.set_ylim([rps[0], 38])
    ax.set_aspect('equal')
    ax.grid(False)
    ax1.set_xlim([0.5, 41])
    ax1.set_ylim([0.5, 41])
    ax1.set_aspect('equal')
    ax1.grid(False)
    
    # text 
    Hr = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[0]}_r'].values)
    Qr = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[1]}_r'].values)
    Hp = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[0]}_p'].values)
    Qp = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[1]}_p'].values)
    ax1.text(1, 35.5, labs[drivers[0]]+f': {Hr:.2f} ({Hp:.2f})\n{labs[drivers[1]]}: {Qr:.2f} ({Qp:.2f})')

    # make colorbar
pad, shrink, fraction = 0.02, 1.0, 0.04
cax = fig.add_axes([1, 1, 0.1, 0.1]) # new ax
cbar = fig.colorbar(im, extend='max', cax=cax)
cbar.ax.set_ylabel("riverine water level\n return period [years]", rotation='vertical')
posn = ax.get_position()
cax.set_position([posn.x1+pad, posn.y0+posn.height*(1-shrink)/2., posn.width*fraction, posn.height*shrink])

ax1.legend(loc='lower left', bbox_to_anchor=(1.05, 0.5), title='driver')

posn = ax1.get_position()
axg = fig.add_axes([posn.x1+0.01, posn.y0-0.1, posn.width*0.8, posn.height], projection=crs_sub) # new ax   
basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, features=['land'])
gdf.loc[locs,:].plot(ax=axg, marker='o', color='red', markersize=50, legend=True)
for i, loc in enumerate(locs):
    x, y = gdf.loc[loc,:].geometry.coords[0]
    axg.text(x+10, y, letters[i], transform=crs_sub)

fn_fig = join(fdir, f'locs_rp_spearmanr_{drivers[0]}_{model2}_wdw{wdw}.png')
print(basename(fn_fig))
plt.savefig(fn_fig)

In [None]:
%matplotlib inline
rm = {'Hskewsurge_day':'Hskewsurge', 'Htot_day_max': 'Htot', 'Htide_day_max': 'Htide', 'Hsurge_day_max': 'Hsurge'}
ds = xr.open_zarr(fn_rivmth_ts).sel(scen='surge').drop(['Hsurge', 'Htide']).rename(rm)
# ds1 = ds.sel(time=slice('01-09-2003', '31-10-2003'))
rps = np.array([0.25, 0.5, 1, 2, 4, 8, 16, 32])
cmap = plt.cm.viridis_r
norm = BoundaryNorm(rps[rps>=1], cmap.N)
kwargs = dict(cmap=cmap, norm=norm, linewidth=0.5, edgecolor='k', s=35)
# for loc in gdf[gdf['driver']==1].sort_values(by='Q_mean').tail(10).index.tolist() +\
#             gdf[gdf['driver']==2].sort_values(by='Q_mean').tail(10).index.tolist() +\
#             gdf[gdf['driver']==3].sort_values(by='Q_mean').tail(10).index.tolist() +\
#             gdf[gdf['driver']==4].sort_values(by='Q_mean').tail(10).index.tolist():
for loc in locs:

    plt.close('all')
    fig = plt.figure(figsize=(20, 8))
    grid = plt.GridSpec(6,5, hspace=0.15, wspace=0.25)

    name = riv_names.get(loc,'unknown')
    gdf_loc = gdf.loc[loc,:]    
    
    # get data
    ts_loc = ds.sel(index=loc, ensemble=model2).reset_coords(drop=True)
    t = ts_loc.time.values
    tlim = t[ts_loc.time.dt.year==2000][0], t[-1]
    df_loc = ds_peaks.sel(index=loc, ensemble=model2).reset_coords(drop=True).to_dataframe()
    tam = pd.to_datetime([f'{yr:04d}{doy:02.0f}' for yr,doy in zip(df_loc.index, df_loc.dayofyear)], format = "%Y%j")
    Hthresh = np.percentile(ts_loc[drivers[0]], 75)
    Hpeaks = get_peaks(ts_loc[drivers[0]], min_dist=30).reindex(time=t)
    Hpeaks = Hpeaks.where(Hpeaks>Hthresh)
    Qthresh = np.percentile(ts_loc[drivers[1]], 75)
    Qpeaks = get_peaks(ts_loc[drivers[1]], min_dist=45).reindex(time=t) #.groupby('time.year').max('time').values
    Qpeaks = Qpeaks.where(Qpeaks>Qthresh)
#     Tthresh = np.percentile(ts_loc['Htot'], 75)
#     Tpeaks = get_peaks(ts_loc['Htot'], min_dist=45).reindex(time=t) #.groupby('time.year').max('time').values
#     Tpeaks = Tpeaks.where(Tpeaks>Tthresh)
    
    #plot ts
    ax0 = fig.add_subplot(grid[:2, :-1])
    ax0.plot(t, ts_loc['WSE'].values, color=plt.cm.tab10.colors[2])
    ax0.plot(tam, ts_loc['WSE'].to_series().loc[tam], '.k')
    ymin, ymax = ax0.get_ylim()
    ax0.vlines(x=tam, ymin=ymin, ymax=ymax, zorder=-1, color='k', linewidth=1, linestyle='-')
    ax0.set_xticklabels([])
    ax0.set_xlim(tlim)
    ax0.set_ylim([ymin, ymax])
    ax0.set_ylabel('WSE')
    
    ax0 = fig.add_subplot(grid[2:4, :-1])
    ax0.plot(t, ts_loc['Q'].values, color=cmmap['Q'])
    ax0.plot(t, Qpeaks.values, '.k')
    ymin, ymax = ax0.get_ylim()
    ax0.vlines(x=tam, ymin=ymin, ymax=ymax, zorder=-1, color='k', linewidth=1, linestyle='--')
    ax0.set_xlim(tlim)
    ax0.set_ylim([ymin, ymax])
    ax0.set_xticklabels([])
    ax0.set_ylabel('Q')

    ax0 = fig.add_subplot(grid[4:, :-1])
    ax0.plot(t, ts_loc['Hskewsurge'].values, color=cmmap['Hskewsurge'])
    ax0.plot(t, ts_loc['Hseas_day_mean'].values, 'k')
    ax0.plot(t, Hpeaks.values, '.k')
    ax0.set_ylabel('Hsurge')
#     ax0.plot(t, ts_loc['Htot'].values, color=cmmap['Hskewsurge'])
#     ax0.plot(t, Tpeaks.values, '.k')
#     ax0.set_ylabel('Htot')
    ymin, ymax = ax0.get_ylim()
    ax0.vlines(x=tam, ymin=ymin, ymax=ymax, zorder=-1, color='k', linewidth=1, linestyle='--')
    ax0.set_xlim(tlim)
    ax0.set_ylim([ymin, ymax])
    
    # plot rp
    ax = fig.add_subplot(grid[:3, -1])
    c = weibull(df_loc['h'].values)
    xx = np.maximum(rps[0],_interp_ev(Qpeaks.dropna('time').values, df_loc[f'{drivers[1]}'].values, 35))
    yy = np.maximum(rps[0],_interp_ev(Hpeaks.dropna('time').values, df_loc[f'{drivers[0]}'].values, 35))
    im = ax.scatter(x=xx, y=yy, c=c, **kwargs)

    for iyr, yr in enumerate(tam.year[c>10]):
        if yr < 2000: continue
        ax.annotate(str(yr), (xx[tam.year==yr], yy[tam.year==yr]))
                    
    # plot rank correlation
    ax1 = fig.add_subplot(grid[3:, -1])
    for v in drivers:
        ax1.plot(rankdata(df_loc[v]), rankdata(df_loc['h']), color=cmmap[v], label=labs[v], marker=mmap[v], linewidth=0)
    ax1.plot([0, 41],[0, 41],'--k')
    
    # axis
    ax.set_xlim([0.85, 38])
    ax.set_ylim([0.85, 38])
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_aspect('equal')
    ax.grid(False)
    ax1.set_xlim([0.5, 41])
    ax1.set_ylim([0.5, 41])
    ax1.set_aspect('equal')
    ax1.grid(False)
    
    # labels
    ax.set_yticks(rps)
    ax.set_yticklabels(rps)
    ax.set_ylabel(labs[drivers[0]]+' return period [years]')
    ax1.set_ylabel('riverine water level rank [-]')
    ax.set_xticks(rps)
    ax.set_xticklabels(rps)
    ax.set_xlabel(labs[drivers[1]]+' return period [years]')
    ax1.set_xlabel('driver rank [-]')
    ax.set_title(f'{loc}. {name} River')
    
    # text 
    Hr = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[0]}_r'].values)
    Qr = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[1]}_r'].values)
    Hp = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[0]}_p'].values)
    Qp = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[1]}_p'].values)
    ax1.text(1, 35.5, labs[drivers[0]]+f': {Hr:.2f} ({Hp:.2f})\n{labs[drivers[1]]}: {Qr:.2f} ({Qp:.2f})')
       
    # make colorbar
    pad, shrink, fraction = 0.02, 1.0, 0.04
    cax = fig.add_axes([1, 1, 0.1, 0.1]) # new ax
    cbar = fig.colorbar(im, extend='max', cax=cax)
    cbar.ax.set_ylabel("riverine water level\n return period [years]", rotation='vertical')
    posn = ax.get_position()
    cax.set_position([posn.x1+pad, posn.y0+posn.height*(1-shrink)/2., posn.width*fraction, posn.height*shrink])

    ax1.legend(loc='lower left', bbox_to_anchor=(1.05, 0.5), title='driver')

    posn = ax1.get_position()
    axg = fig.add_axes([posn.x1+0.01, posn.y0-0.1, posn.width*0.8, posn.height], projection=crs_sub) # new ax   
    basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, features=['land'])
    gdf.loc[[loc],:].plot(ax=axg, marker='o', color='red', markersize=25, legend=True)
    x, y = gdf.loc[loc,:].geometry.coords[0]
    axg.text(x+10, y, loc, transform=crs_sub)

#     fn_fig = join(fdir, 'locs', f'loc{loc:04d}_{model2}_wdw{wdw}.png')
    fn_fig = join(fdir, f'loc{loc:04d}_{model2}_wdw{wdw}.png')
    print(basename(fn_fig))
    plt.savefig(fn_fig)
#     break

## maps

In [None]:

qmin=0
labels = {
    'h': '$\Delta h$ at'+f' T{rp} [m]',
    'scale': f'difference in Gumbel scale ($\\beta$) parameter',
    'loc': f'difference in Gumbel location ($\mu$) parameter',
}
vmaxs = {
    'h': 1.0, 'loc': 0.6, 'scale': 0.15
}
vmins = {
    'h': -0.1, 'loc': -0.2, 'scale': -0.15
}
names = {
    'loc': '$\mu$', 'scale': '$\\beta$'
}
diff_names = {
    0: '',
    1: '$_{daily}$',
    2: '$_{seasonal}$',
}

mp = 0.45
plt.close('all')

# cmap = ListedColormap(sns.color_palette("coolwarm", 200))
# cmap = cmap_turbo_div
for par in ['h', 'scale', 'loc'][:1]:
    var = f'diff_{par}_surge_tide'
    vmin, vmax = vmins[par],  vmaxs[par]
    n = vmax/abs(vmin)+2
    norm = MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.)
    cticks=np.linspace(vmin, vmax, n)
    cmap = ListedColormap([interpolate_cmap(plt.cm.RdBu_r(np.arange(256)), x) for x in 
                           np.hstack([np.linspace(mp+mp*4*vmin,mp,100), np.linspace(1-mp,1,100)])])    

    if par == 'h':
        fn_fig = join(fdir, f'{var}_{model}_T{rp:03d}.png')
        label = labels[par].format(rp=rp)
    elif irp > 0: 
        continue
    else:
        fn_fig = join(fdir, f'{var}_{model}.png')
        label = labels[par]


    fig = plt.figure(figsize=(18, 10.5))
    grid = plt.GridSpec(3, 2, wspace=0.04, hspace=0.01)

    column = var
    gdf0 = gdf[np.logical_or(gdf[f'{var}_sign']==0, gdf['Q_mean']<qmin)].copy()
    gdf1 = gdf[np.logical_and(np.logical_and(gdf[f'{var}_sign']==1, gdf[f'{var}_sign_ci']!=1), gdf['Q_mean']>=qmin)].copy().sort_values(column, ascending=True)
    gdf2 = gdf[np.logical_and(np.logical_and(gdf[f'{var}_sign']==1, gdf[f'{var}_sign_ci']==1), gdf['Q_mean']>=qmin)].copy().sort_values(column, ascending=True)

    # main map
    axg = fig.add_subplot(grid[:-1, :], projection=crs_sub)
    basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, **bmap_kwargs)
    gdf0.plot(ax=axg, marker='x', color=(0, 0, 0, 0.6), markersize=15, linewidth=0.2, alpha=0.6, legend=False)
    plot_kwargs.update(markersize=25, marker='o', linewidth=0.5)
    plot_choropleth(
        fig, axg, gdf1, column=column, 
        cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, norm=norm, discrete=False, 
        plot_kwargs=plot_kwargs,
        cbar_kwargs=dict(label=label, location='right', extend='both'),
        cbar_pos = dict(pad=0.02, fraction=0.01, shrink=0.6)
    )
    if len(gdf2) > 0:
        plot_kwargs.update(markersize=25, marker='d', linewidth=0.5)
        cax = plot_choropleth(
            fig, axg, gdf2, column=column, 
            cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, norm=norm, discrete=False, 
            plot_kwargs=plot_kwargs,

        )
    axg.text(-175, 85, letters[0]+'. $\Delta$'+names.get(par,par)+diff_names[0], transform=crs_sub, fontsize='large')

    for i, var in enumerate([f'diff_{par}_surge_seas', f'diff_{par}_seas_tide']):

        column = var
        gdf0 = gdf[np.logical_or(gdf[f'{var}_sign']==0, gdf['Q_mean']<qmin)].copy()
        gdf1 = gdf[np.logical_and(np.logical_and(gdf[f'{var}_sign']==1, gdf[f'{var}_sign_ci']!=1), gdf['Q_mean']>=qmin)].copy().sort_values(column, ascending=True)
        gdf2 = gdf[np.logical_and(np.logical_and(gdf[f'{var}_sign']==1, gdf[f'{var}_sign_ci']==1), gdf['Q_mean']>=qmin)].copy().sort_values(column, ascending=True)

        # main map
        axg = fig.add_subplot(grid[-1, i], projection=crs_sub)
        basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, **bmap_kwargs)
        gdf0.plot(ax=axg, marker='x', color=(0, 0, 0, 0.6), markersize=10, linewidth=0.2, alpha=0.6, legend=False)

        plot_kwargs.update(markersize=20, marker='o', linewidth=0.5)
        cax = plot_choropleth(
            fig, axg, gdf1, column=column, 
            cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, norm=norm, discrete=False, 
            plot_kwargs=plot_kwargs,
        )
        if len(gdf2) > 0:
            plot_kwargs.update(markersize=s, marker='d')
            cax = plot_choropleth(
                fig, axg, gdf2, column=column, 
                cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, norm=norm, discrete=False, 
                plot_kwargs=plot_kwargs
            )
        axg.text(-175, 85, letters[i+1]+'. $\Delta$'+names.get(par,par)+diff_names[i+1], transform=crs_sub, fontsize='large')

    print(basename(fn_fig))
    plt.savefig(fn_fig)


In [None]:
import geopandas as gpd
basins = gpd.read_file(r'/home/dirk/models/cama-flood_bmi_v3.6.2/map/global_15min/flw_basins')
basins['area'] = basins.area
basins['rivmth_idx'] = basins['DN'] - 1
# basins = basins.sort_values(by='area',ascending=False).drop_duplicates(subset='rivmth_idx')
basins = basins.merge(gdf.drop(columns='geometry'), on='rivmth_idx', how='right')
# fig, ax = plt.subplots(1,1, figsize=(10,5))
# basins.plot(linewidth=0.1, ax=ax)

In [None]:
qmin=0

column = f'people_affected_dH_dp'
label=f'people affected [-]'
vmin, vmax, n= 2, 6, 5
cticks=np.linspace(vmin, vmax, n)
cmap = ListedColormap(sns.cubehelix_palette(16, start=.5, rot=-.75))

# var = f'diff_h_surge_tide'
# gdf0 = basins[np.logical_or(basins[f'{var}_sign']==0, basins['Q_mean']<qmin)].copy()
# gdf1 = basins[np.logical_and(basins[f'{var}_sign']==1, basins['Q_mean']>=qmin)].copy().sort_values(column, ascending=False)
gdf1 = basins.copy()
if 'perc' not in column:
    gdf1[column] = np.where(gdf1[column]<=0, 0, np.log10(gdf1[column]))
plt.close('all')
fig = plt.figure(figsize=(18, 10.5))
grid = plt.GridSpec(2, 1, wspace=0.01, hspace=0.01)

# map 1
axg = fig.add_subplot(grid[0, 0], projection=crs_sub)
basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, features=['land'], **bmap_kwargs)
plot_kwargs.pop('marker',None)
plot_kwargs.pop('markersize',None)
plot_kwargs.update(linewidth=0.1)
cax = plot_choropleth(
    fig, axg, gdf1, column=column, 
    cmap=plt.cm.Greens , cticks=cticks, vmin=vmin, vmax=vmax, discrete=False, 
    plot_kwargs = plot_kwargs,
    cbar_kwargs = dict(label=label, location='right'),
    cbar_pos = dict(pad=0.02, fraction=0.01, shrink=0.5)
)
cax.set_yticklabels([f'$10^{e:0.0f}$' for e in cticks])
axg.text(-175, 85, letters[0]+'', transform=crs_sub, fontsize='large')
    
column = f'people_affected_dH_dp_perc'
label=f'people affected [%]'
vmin, vmax, n= 0, 40, 5
cticks=np.linspace(vmin, vmax, n)
# cmap = ListedColormap(sns.cubehelix_palette(16, start=.5, rot=-.75))

# map 2
axg2 = fig.add_subplot(grid[1, 0], projection=crs_sub)
basemap(axg2, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, features=['land'], **bmap_kwargs)
plot_kwargs.pop('marker',None)
plot_kwargs.pop('markersize',None)
plot_kwargs.update(linewidth=0.1)
cax2 = plot_choropleth(
    fig, axg2, gdf1, column=column, 
    cmap=plt.cm.Blues, cticks=cticks, vmin=vmin, vmax=vmax, discrete=False, 
    plot_kwargs = plot_kwargs,
    cbar_kwargs = dict(label=label, location='right'),
    cbar_pos = dict(pad=0.02, fraction=0.01, shrink=0.5)
) 
axg2.text(-175, 85, letters[1], transform=crs_sub, fontsize='large')


fn_fig = join(fdir, f'expected_annual_people_exposed_{model}.png')
print(basename(fn_fig))
plt.savefig(fn_fig)

In [None]:
clist = plt.cm.tab10.colors
mmapp = {
#     'compound_(N=5)': 'd', 
    'insign': 'o',
    'compound': 'd',
    'H': '^', 
    'Q': 's', 
}
cmapp = {'compound_(N=5)': clist[3], f'compound': clist[1],
         'H': clist[0], 'Q': clist[2], 'insign': 'grey'}
nmapp = {
    'H': {'Hsurge': 'Surge', 'Hskewsurge': 'Skew Surge'}[drivers[0]], 
    'Q': 'Discharge',
    'insign': 'Insignificant',
    'compound': 'Compound',
    'all': 'All locations'
}


label=f'spearman correlation [-]'
s=20
vmin, vmax, n= 0.5, 1, 6
cticks=np.linspace(vmin, vmax, n)
cmap  = plt.cm.Blues

plt.close('all')
fig = plt.figure(figsize=(18, 10.5))
grid = plt.GridSpec(3, 2, wspace=0.02, hspace=0.01)
        
# main map
var = f'diff_h_surge_tide'
# gdf0 = gdf[gdf[f'{var}_sign']==0]
gdf1 = gdf#[gdf[f'{var}_sign']==1].copy().sort_values(var, ascending=True)
       
axg = fig.add_subplot(grid[:-1, :], projection=crs_sub)
basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, **bmap_kwargs)
# gdf0.plot(ax=axg, marker='x', color=(0, 0, 0, 0.6), markersize=12, linewidth=0.2, alpha=0.6, label='insignificant $\Delta h$')
for d in list(mmapp.keys())[::-1]:
    if f'driver_{d}' not in gdf1.columns: continue
    gdf1[gdf1[f'driver_{d}']==True].plot(
        ax=axg, marker=mmapp[d], color=cmapp[d], 
        markersize=s, linewidth=0.5, legend=False, 
#         label=' '.join(d.split('_')).replace('H','$H_{surge}$').replace('insign', 'no classification')
        label = nmapp.get(d, d)
    )
axg.legend(loc='lower left', bbox_to_anchor=(0.05, 0.1))
axg.text(-175, 85, f'{letters[0]}. Flood driver classification', transform=crs_sub, fontsize='large')

for i, v in enumerate(['H', 'Q']):
    # main map
    axg = fig.add_subplot(grid[-1, i], projection=crs_sub)
    basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, **bmap_kwargs)
    column = f'driver_{v}_r'
    gdf0 = gdf[gdf[f'driver_{v}_sign']==False]
    gdf1 = gdf[gdf[f'driver_{v}_sign']==True].sort_values(by=column, ascending=True)
    gdf0.plot(ax=axg, marker='x', color=(0, 0, 0, 0.6), markersize=12, linewidth=0.2, alpha=0.6, legend=False)
    plot_kwargs.update(markersize=12)
    cax = plot_choropleth(
        fig, axg, gdf1, column=column, 
        cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, discrete=False,
        plot_kwargs=plot_kwargs,
        cbar_kwargs=dict(label=label, location='right', extend='min') if i == 1 else None,
        cbar_pos = dict(pad=0.02, fraction=0.015, shrink=0.8) 
    )
    axg.text(-175, 85, f'{letters[i+1]}. {nmapp[v]}', transform=crs_sub, fontsize='large')
fn_fig = join(fdir, f'spearman_driver_{drivers[0]}_wdw{wdw}_{model}.png')
print(basename(fn_fig))
plt.savefig(fn_fig)

In [None]:
vvs = {
    'drivers': {
#         'Hseasrange_cm': dict(label='seasonal sea level range [cm]'),
        'Hskewsurge_amax_cm': dict(label='mean annual max. skew surge [cm]'),
#         'Hskewsurge_amax_cv': dict(label='interannual variability (CV)\nannual max. skew surge [-]', fmt='.2f'),

        'Q_amax': dict(label='mean annual max. discharge [m$^3$ s$^{-1}$]'),
    },
    'basin': {
        'Q_mean': dict(label='mean discharge [m$^3$ s$^{-1}$]'),
        'Q_amax_cv': dict(label='interannual variability (CV)\nannual max. discharge [-]', fmt='.2f'),
        'uparea_100': dict(label='catchment area [x100 km$^2$]'),
#         'mean_drain_length': dict(label='mean drainage length [km]'),
        'mean_drain_slope': dict(label='mean drainage slope [m km$^{-1}$]', fmt='.2f'),
    }
}
rp=10
dh=0.12
var = f'diff_h_surge_tide'
attrs_sign = gdf[gdf[f'{var}_sign']==1]
# attrs_sign = attrs2[attrs2['Hseasrange_cm']>13]

box_kwargs=dict(whis=[5,95], boxprops=dict(linewidth=1.), medianprops=dict(linewidth=1.5), 
                showfliers=False, flierprops=dict(markersize=2))

for kind, vs in vvs.items(): 
    fig = plt.figure(figsize=(len(vs)*4, 3.5))
    grid = plt.GridSpec(1, len(vs.keys()), hspace=0.1, wspace=0.1)
    ylim2 = [-0.1, 1.1]
    ylab2 = f'difference in riverine water level [m]\n 1-in-{rp} years return period'
    ylab1 = 'count [-]'
    for i, name in enumerate(vs):

        ax2 = fig.add_subplot(grid[:,i])

        qbins = np.arange(0,0.99,0.25)
        bins = attrs_sign[name].quantile(qbins).values
#         print(name, bins)
        n = len(bins)
        lab = vs[name]['label']
        fmt = vs[name].get('fmt', None)
        if fmt == '.2f':
            ticklabs = [f'{v0:.2f}' for v0 in  bins]
        elif fmt == '.1f':
            ticklabs = [f'{v0:.1f}' for v0 in  bins]
        else:
            ticklabs = [f'{v0:.0f}' for v0 in  bins]
        for ii, v in enumerate(['Q', 'H', f'compound', 'all']):
            if v != 'all':
                df = attrs_sign[attrs_sign[f'driver_{v}']==1].copy()
            else:
                df = attrs_sign.copy()
#             bins = df[name].quantile(qbins).values
    #         positions=np.arange(n)+(-1 if ii==0 else 1)*dh*0.75
#             positions=np.arange(n)+(ii-1)*dh*1.5
            positions=np.arange(n)+0.5*dh+(ii-2)*dh*1.5
            df['bins'] = np.digitize(df[name],bins)
            dd = df.boxplot(column=var, by='bins', widths=dh, positions=positions, ax=ax2, return_type='dict', **box_kwargs)
            # plot lines for legend
            if i == len(vs)-1:
                ax2.plot([0,1],[2,2], color=cmapp.get(v, 'k'), label=v.split('_')[0].replace('H','$H_{surge}$'))
            for _, row in dd.iteritems():
                c = cmapp.get(v, 'k')
                for box in row['boxes']:
                    box.set_color(c)
                for med in row['medians']:
                    med.set_color(c)
                for w in row['whiskers']:
                    w.set_color(c)
                for cap in row['caps']:
                    cap.set_color(c)
    #     positions=np.arange(n)
    #     df = attrs_sign.copy()
    #     df['bins'] = np.digitize(df[name],bins)
    #     dd = df.boxplot(column=var, by='bins', ax=ax2, positions=positions, **box_kwargs)

        ax2.set_ylim(ylim2)
        ax2.set_xlim([-0.5,n-0.5])
        ax2.set_xticks(np.arange(n)-0.5)
        ax2.set_xticklabels(ticklabs)
        ax2.set_xlabel(lab)
        ax2.set_title('')
        ax2.text(-0.4,1.02, letters[i])
#         ax2.xaxis.grid(False)

        if i != 0:
            ax2.set_yticklabels([])
        else:
            ax2.set_ylabel(ylab2)

    ax2.legend(loc='lower left', bbox_to_anchor=(1.05, 0.5), title='main driver')
    fig.suptitle('')

    fn = join(fdir, f'boxplot_{var}_rp{rp:03d}_vs_{kind}_{drivers[0]}_{model}.png')
    print(basename(fn))
    plt.savefig(fn)

In [None]:
vs = {
#     'Hseasrange': dict(label='seasonal sea level range [m]', ylim=[0, 0.5]),
    'Hskewsurge_amax': dict(label='$H_{SS}$ [m]', ylim=[0,1.5], title='Mean annual max. $H_{SS}$\n'),
    'Hskewsurge_amax_cv': dict(label='Coef. of variation  [-]', fmt='.2f', ylim=[0,0.6], title='Coef. of variation \nannual max. $H_{SS}$'),
    'Q_amax_log10': dict(label='Q [m$^3$ s$^{-1}$]', fmt='e', ylim=[0.5,4.0], title='Mean annual max. $Q$\n'),
    'Q_amax_cv': dict(label='Coef. of variation [-]', ylim=[0,1.5], title='Coef. of variation \nannual max. $Q$'),

    'Q_mean_log10': dict(label='Q [m$^3$ s$^{-1}$]', fmt='e', ylim=[0,3.2], title='Long term mean $Q$'),
#     'runoff': dict(label='mean runoff [mm]'),
    'uparea_log10': dict(label='Area [km$^2$]', fmt='e', ylim=[3,5], title='Catchment area'),
    'mean_drain_length': dict(label='Length [km]', ylim=[0,400], title='Drainage length'),
    'mean_drain_slope': dict(label='Slope [m km$^{-1}$]', fmt='.2f', ylim=[0,8], title='Drainage slope'),
}

box_kwargs=dict(whis=[5,95], boxprops=dict(linewidth=1.), medianprops=dict(linewidth=1.5), 
                showfliers=False, flierprops=dict(markersize=2), showmeans=True, meanprops=dict(marker='s', markersize=5))

gdf['Q_amax_log10'] = np.log10(np.maximum(1, gdf['Q_amax']))
gdf['Q_mean_log10'] = np.log10(np.maximum(1, gdf['Q_mean']))
gdf['uparea_log10'] = np.log10(np.maximum(1, gdf['uparea']))
gdf['runoff'] = gdf['Q_mean'] / (gdf['uparea']*1e6) * 1e3 #[mm]
gdf['driver'] = gdf['driver_H']+gdf['driver_Q']*2+gdf[f'driver_compound']*3


dh=0.12
var = f'diff_h_surge_tide'
df1 = gdf  #[gdf[f'{var}_sign']==1].drop(columns=['geometry'])


ncol = int(np.ceil(len(vs)/2))
fig = plt.figure(figsize=(ncol*2.5, 2*3))
grid = plt.GridSpec(2, ncol, hspace=0.2, wspace=0.55)

for i, name in enumerate(vs):
    irow = int(i // ncol)
    icol = int(i % ncol)
    ax2 = fig.add_subplot(grid[irow, icol])
    
    samples = {v: df1[[name]][df1[f'driver_{v}']==1] for v in ['Q', 'H', 'compound']}
    samples['all'] = df1[[name]]
#     print({n: f'{float(df.var()):.2f}' for n,df in samples.items()})
    
    for ii, v in enumerate(['all','Q', 'H', 'compound']):
        sign = True
        if v != 'all':
            other = [v1 for v1 in ['Q', 'H', 'compound'] if v1 != v]
            sign = np.all(np.array([float(ttest_ind(samples[v], samples[v1], equal_var=True)[1]) for v1 in other])<0.01)
        positions=[0.5*dh+(ii-2)*dh*1.5]       
        dd = samples[v].boxplot(column=name, widths=dh, positions=positions, ax=ax2, return_type='dict', **box_kwargs)
        # plot lines for legend
        if i == len(vs)-1:
            ax2.plot([2,2],[0,2], color=cmapp.get(v, 'k'), label=nmapp.get(v,v))
        c = cmapp.get(v, 'k')
        for box in dd['boxes']:
            box.set_color(c)
        for med in dd['medians']:
            med.set_color(c)
        for w in dd['whiskers']:
            w.set_color(c)
        for cap in dd['caps']:
            cap.set_color(c)
        for p in dd['means']:
            if v == 'all':
                p.set_marker('x')
            if sign:
                p.set_markerfacecolor(c)
            else:
                p.set_markerfacecolor('none')
            p.set_markeredgecolor(c)

#     ax2.plot([2],[0], marker='s', markerfacecolor='k', label='mean', linewidth=0, markeredgecolor='k' )
    ylim = vs[name].get('ylim', None)
    if ylim is not None:
        ymin, ymax = ylim
    else:
        ymin, ymax = samples['all'][name].quantile([0.00,0.99])
        
    fmt = vs[name].get('fmt', None)
    if fmt == 'e':
        yticks = np.arange(np.floor(ymin), np.ceil(ymax), 1)
        yticklabels = [f'10$^{t:.0f}$' for t in yticks]
        ax2.set_yticks(yticks)
        ax2.set_yticklabels(yticklabels)
    
    lab = vs[name]['label']
    title = vs[name].get('title', name)
    ax2.set_ylabel(lab)
    ax2.set_xlim([-0.45,0.4])
    ax2.set_ylim([ymin, ymax])
    ax2.set_xticks([])
    ax2.set_title(f'{letters[i]}. {title}')
#     ax2.text(0.05,0.93, letters[i], transform=ax2.transAxes)

ax2.legend(loc='lower left', bbox_to_anchor=(1.05, 0.5), title='main driver')
# fig.suptitle('')

fn = join(fdir, f'boxplot_classification_{model}.png')
print(basename(fn))
plt.savefig(fn)