In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import geopandas as gp
from os.path import join, basename
from datetime import date, datetime
import os
from scipy.stats import rankdata
from scipy.interpolate import interp1d

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../3-postprocess/'))
import xstats as xs 
from peaks import get_peaks

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from plot_tools import *
import cartopy.crs as ccrs
import seaborn as sns
from string import ascii_uppercase as letters

In [None]:
rc = {'savefig.bbox': 'tight',  'savefig.format': 'png', 'savefig.dpi':300}
context = 'paper'
# sns.set(context=context, style='whitegrid', font_scale=0.75 if context == 'talk' else 1., rc=rc)
sns.set(context=context, style='whitegrid', font_scale=1.2 if context == 'paper' else 1., rc=rc)
crs_sub = ccrs.PlateCarree()
s=45
qmin=25

bmap_kwargs = dict()
plot_kwargs=dict(edgecolor=(0.5, 0.5, 0.5, 0.8), linewidth=0.5, legend=False, zorder=2)
box_kwargs=dict(whis=[5,95], boxprops=dict(linewidth=1.), medianprops=dict(linewidth=1.5), 
                showfliers=False, flierprops=dict(markersize=2))

locs = [1930, 1618, 809]
riv_names = {
    809: 'Mattapone', 
    1695: 'Weser',
    1930: 'Dal',
    1618: 'Volta',  
    2884: 'Ataran',
}
drivers = ['Hskewsurge','Q']

In [None]:
root = r'/scratch/compound_hotspots'
ddir = join(root, 'data', '4-postprocessed')
fdir = join(root, 'reports', 'figures')
wdw = 1
model = 'mean'
rp = 10

### Load & combine data

In [None]:
attrs_fn = join(ddir, 'rivmth_mean_attrs.csv')
attrs = pd.read_csv(attrs_fn, index_col='index').rename(columns={'rivmth_lat':'lat', 'rivmth_lon':'lon'})

attrs['uparea_log10'] = np.log10(np.maximum(attrs['uparea'].values, 0.001))
attrs['Hseasrange'] = attrs['Hseas_amax']-attrs['Hseas_amin']
attrs['mean_drain_length'] = attrs['mean_drain_length']/1e3 #[km]
attrs['mean_drain_slope'] = attrs['mean_drain_slope']*1e3 #[m/km]
attrs['uparea_100'] = attrs['uparea']/1e2
attrs['Hseasrange_cm'] = attrs['Hseasrange']*1e2
attrs['Hseas_amax_cm'] = attrs['Hseas_amax']*1e2
attrs['Hsurge_amax_cm'] = attrs['Hsurge_amax']*1e2
attrs['Hskewsurge_amax_cm'] = attrs['Hskewsurge_amax']*1e2

In [None]:
# lazely open time series data
fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
ds = xr.open_zarr(fn_rivmth_ts)
# open time extreme value analysis data
fn_wse_ev = join(ddir, f'rivmth_wse_ev.nc')
ds_rp = xr.open_dataset(fn_wse_ev).sel(index=attrs.index)#.sel(ensemble=[model])
# open annual max with drivers data
fn_drivers = join(ddir, f'rivmth_drivers_wdw{wdw}.nc')
ds_peaks = xr.open_dataset(fn_drivers).sel(index=attrs.index)#.sel(ensemble=[model])

In [None]:
fn_spear = join(ddir, f'rivmth_drivers_wdw{wdw}_spearmanrank.nc')
ds_spear = xr.open_dataset(fn_spear)

fn_drivers = join(ddir, f'rivmth_drivers_wdw{wdw}_ensemble-{model}.nc')
ds_drivers = xr.open_dataset(fn_drivers)

fn_impact = join(ddir, f'rivmth_pop_affected_ensemble-{model}.nc')
ds_impact = xr.open_dataset(fn_impact)

fn_wse_ev = join(ddir, f'rivmth_wse_ev_ensemble-{model}.nc')
ds_diff_h_stats = xr.open_dataset(fn_wse_ev)

ds_stats = xr.merge([ds_diff_h_stats, 
                     ds_impact, 
                     ds_drivers]).sel(T=rp)

In [None]:
gdf = pandas2geopandas(pd.concat([
    attrs,
    ds_stats.reset_coords(drop=True).to_dataframe()
], axis=1))

In [None]:
qmin = gdf['Q_mean'].sort_values(ascending=False).iloc[2000]
(gdf['Q_mean']>qmin).sum()

## drivers - methods

In [None]:
from scipy.stats import rankdata
from scipy.stats import ttest_ind
from scipy.interpolate import interp1d

def weibull(peaks, nyears=None):
    peaks = peaks[np.isfinite(peaks)]
    peaks_rank = rankdata(peaks, 'ordinal')
    P = peaks_rank/(peaks.size+1)
    freq = 1. if nyears is None else peaks.size / nyears
    rp = 1/(1-P)/freq
    return rp

def _interp_ev(peaks, vals, nyears=None):
    peaks = peaks[np.isfinite(peaks)]
    peaks.sort()
    peaks_rank = np.arange(peaks.size)+1
    P = peaks_rank/(peaks.size+1)
    freq = 1. if nyears is None else peaks.size / nyears
    rp = 1/(1-P)/freq
    kwargs = dict(
        kind='linear', bounds_error=False, assume_sorted=True,
        fill_value=(rp.min(), rp.max())
    )
    rp_out = interp1d(peaks, rp, **kwargs)(vals)
    return rp_out

In [None]:
rm = {'Hskewsurge_day':'Hskewsurge', 'Htot_day_max': 'Htot', 'Htide_day_max': 'Htide', 'Hsurge_day_max': 'Hsurge'}
ds1 = ds.sel(scen='surge').drop(['Hsurge', 'Htide']).rename(rm)#.sel(time=slice('01-01-2000', '31-12-2014'))
model2 = 'nerc'

rps = np.array([0.2, 1, 2, 4, 8, 16, 32])
cmap = plt.cm.viridis_r
norm = BoundaryNorm(rps[rps>=1], cmap.N)

mmap = {
    'Hskewsurge': '^',
    'Q': 's',
}
cmmap = {
    'Hskewsurge': plt.cm.tab10.colors[3],
    'Q': plt.cm.tab10.colors[2],
}
labs = {
    'Hskewsurge': '$H_{SS}$',
    'Q': 'Q',
}


plt.close('all')
xlim = [0.1,40]
shape=(2,len(locs))
fig = plt.figure(figsize=(len(locs)*4, 8.5))
grid = plt.GridSpec(*shape, hspace=0.15, wspace=0.15)

kwargs = dict(cmap=cmap, norm=norm, linewidth=0.5, edgecolor='k', s=35)

for i, loc in enumerate(locs):
    name = riv_names.get(loc,'unknown')
    gdf_loc = gdf.loc[loc,:]    
    irow = i // shape[1]
    icol = i % shape[1]
    
    # get data
    ts_loc = ds1.sel(index=loc, ensemble=model2).reset_coords(drop=True)
    df_loc = ds_peaks.sel(index=loc, ensemble=model2).reset_coords(drop=True).to_dataframe()
#     df_loc[~np.isfinite(df_loc)] = 0.9

    # plot rp
    ax = fig.add_subplot(grid[irow, icol])
    c = weibull(df_loc['h'].values)
    ythresh = np.percentile(ts_loc[drivers[0]], 75)
    xthresh = np.percentile(ts_loc[drivers[1]], 75)
    ypeaks = get_peaks(ts_loc[drivers[0]], min_dist=30)
    ypeaks = ypeaks.where(ypeaks>ythresh).dropna('time').values #.groupby('time.year').max('time').values
    xpeaks = get_peaks(ts_loc[drivers[1]], min_dist=45)
    xpeaks = xpeaks.where(xpeaks>xthresh).dropna('time').values #.groupby('time.year').max('time').values
    xx = np.maximum(rps[0]*1.1,_interp_ev(xpeaks, df_loc[f'{drivers[1]}'].values, 35))
    yy = np.maximum(rps[0]*1.1,_interp_ev(ypeaks, df_loc[f'{drivers[0]}'].values, 35))
    im = ax.scatter(x=xx, y=yy, c=c, **kwargs)
    
    # plot rank correlation
    ax1 = fig.add_subplot(grid[irow+1, icol])
    for v in drivers:
        ax1.plot(rankdata(df_loc[v]), rankdata(df_loc['h']), color=cmmap[v], label=labs[v], marker=mmap[v], 
                 linewidth=0, markersize=5 if mmap[v]=='s' else 6)
    ax1.plot([0, 41],[0, 41],'--k',zorder=-1)
    
    # labels
    ax.set_xscale('log')
    ax.set_yscale('log')
    if i == 0:
        ax.set_yticks(rps)
        ax.set_yticklabels(rps)
        ax.set_ylabel(labs[drivers[0]]+' return period [years]')
        ax1.set_ylabel('riverine water level rank [-]')
    else:
        ax.set_yticklabels([])
        ax.set_ylabel("")
        ax1.set_yticklabels([])
        ax1.set_ylabel("")
    ax.set_xticks(rps)
    ax.set_xticklabels(rps)
    ax.set_xlabel(labs[drivers[1]]+' return period [years]')
    ax1.set_xlabel('driver rank [-]')
    ax.set_title(f'{letters[i]}. {name} River')
    
    # axis
    ax.set_xlim([rps[0], 38])
    ax.set_ylim([rps[0], 38])
    ax.set_aspect('equal')
    ax.grid(False)
    ax1.set_xlim([0.5, 41])
    ax1.set_ylim([0.5, 41])
    ax1.set_aspect('equal')
    ax1.grid(False)
    
    # text 
    Hr = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[0]}_r'].values)
    Qr = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[1]}_r'].values)
    Hp = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[0]}_p'].values)
    Qp = float(ds_spear.sel(index=loc, ensemble=model2)[f'{drivers[1]}_p'].values)
    ax1.text(1, 35.5, labs[drivers[0]]+f': {Hr:.2f} ({Hp:.2f})\n{labs[drivers[1]]}: {Qr:.2f} ({Qp:.2f})')

    # make colorbar
pad, shrink, fraction = 0.02, 1.0, 0.04
cax = fig.add_axes([1, 1, 0.1, 0.1]) # new ax
cbar = fig.colorbar(im, extend='max', cax=cax)
cbar.ax.set_ylabel("riverine water level\n return period [years]", rotation='vertical')
posn = ax.get_position()
cax.set_position([posn.x1+pad, posn.y0+posn.height*(1-shrink)/2., posn.width*fraction, posn.height*shrink])

ax1.legend(loc='lower left', bbox_to_anchor=(1.05, 0.5), title='driver')

posn = ax1.get_position()
axg = fig.add_axes([posn.x1+0.01, posn.y0-0.1, posn.width*0.8, posn.height], projection=crs_sub) # new ax   
basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, features=['land'])
gdf.loc[locs,:].plot(ax=axg, marker='o', color='red', markersize=50, legend=True)
for i, loc in enumerate(locs):
    x, y = gdf.loc[loc,:].geometry.coords[0]
    axg.text(x+10, y, letters[i], transform=crs_sub)

fn_fig = join(fdir, f'locs_rp_spearmanr_{drivers[0]}_{model2}_wdw{wdw}.png')
print(basename(fn_fig))
plt.savefig(fn_fig)

## flood levels - results

In [None]:
labels = {
    'h': '$\Delta h$ at'+f' T{rp} [m]',
    'scale': f'difference in Gumbel scale ($\\beta$) parameter',
    'loc': f'difference in Gumbel location ($\mu$) parameter',
}
vmaxs = {
    'h': 1.0, 'loc': 0.6, 'scale': 0.15
}
vmins = {
    'h': -0.1, 'loc': -0.2, 'scale': -0.15
}
names = {
    'loc': '$\mu$', 'scale': '$\\beta$'
}
diff_names = {
    0: '',
    1: '$_{daily}$',
    2: '$_{seasonal}$',
}

mp = 0.45
plt.close('all')

# cmap = ListedColormap(sns.color_palette("coolwarm", 200))
# cmap = cmap_turbo_div
for par in ['h', 'scale', 'loc'][:1]:
    var = f'diff_{par}_surge_tide'
    vmin, vmax = vmins[par],  vmaxs[par]
    n = int(vmax/abs(vmin)+2)
    norm = MidpointNormalize(vmin=vmin, vmax=vmax, midpoint=0.)
    cticks=np.linspace(vmin, vmax, n)
    cmap = ListedColormap([interpolate_cmap(plt.cm.RdBu_r(np.arange(256)), x) for x in 
                           np.hstack([np.linspace(mp+mp*4*vmin,mp,100), np.linspace(1-mp,1,100)])])    

    if par == 'h':
        fn_fig = join(fdir, f'{var}_{model}_T{rp:03d}.png')
        label = labels[par].format(rp=rp)
    elif irp > 0: 
        continue
    else:
        fn_fig = join(fdir, f'{var}_{model}.png')
        label = labels[par]


    fig = plt.figure(figsize=(18, 10.5))
    grid = plt.GridSpec(3, 2, wspace=0.04, hspace=0.01)

    column = var
    gdf0 = gdf[np.logical_and(gdf[f'{var}_sign']==0, gdf['Q_mean']>qmin)].copy()
    gdf1 = gdf[np.logical_and(np.logical_and(gdf[f'{var}_sign']==1, gdf[f'{var}_sign_ci']!=1), gdf['Q_mean']>qmin)].copy().sort_values(column, ascending=True)
    gdf2 = gdf[np.logical_and(np.logical_and(gdf[f'{var}_sign']==1, gdf[f'{var}_sign_ci']==1), gdf['Q_mean']>qmin)].copy().sort_values(column, ascending=True)

    # main map
    axg = fig.add_subplot(grid[:-1, :], projection=crs_sub)
    basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, **bmap_kwargs)
    gdf0.plot(ax=axg, marker='x', color='grey', markersize=s, linewidth=1, alpha=1, legend=False)
    plot_kwargs.update(markersize=s, marker='o', linewidth=0.5)
    plot_choropleth(
        fig, axg, gdf1, column=column, 
        cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, norm=norm, discrete=False, 
        plot_kwargs=plot_kwargs,
        cbar_kwargs=dict(label=label, location='right', extend='both'),
        cbar_pos = dict(pad=0.02, fraction=0.01, shrink=0.6)
    )
    if len(gdf2) > 0:
        plot_kwargs.update(markersize=s, marker='d', linewidth=0.5)
        cax = plot_choropleth(
            fig, axg, gdf2, column=column, 
            cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, norm=norm, discrete=False, 
            plot_kwargs=plot_kwargs,

        )
    axg.text(-175, 85, letters[0]+'. $\Delta$'+names.get(par,par)+diff_names[0], transform=crs_sub, fontsize='large')

    for i, var in enumerate([f'diff_{par}_surge_seas', f'diff_{par}_seas_tide']):

        column = var
        gdf0 = gdf[np.logical_and(gdf[f'{var}_sign']==0, gdf['Q_mean']>qmin)].copy()
        gdf1 = gdf[np.logical_and(np.logical_and(gdf[f'{var}_sign']==1, gdf[f'{var}_sign_ci']!=1), gdf['Q_mean']>qmin)].copy().sort_values(column, ascending=True)
        gdf2 = gdf[np.logical_and(np.logical_and(gdf[f'{var}_sign']==1, gdf[f'{var}_sign_ci']==1), gdf['Q_mean']>qmin)].copy().sort_values(column, ascending=True)

        # main map
        axg = fig.add_subplot(grid[-1, i], projection=crs_sub)
        basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, **bmap_kwargs)
        gdf0.plot(ax=axg, marker='x', color='grey', markersize=.6*s, linewidth=1, alpha=1, legend=False)

        plot_kwargs.update(markersize=.6*s, marker='o', linewidth=0.5)
        cax = plot_choropleth(
            fig, axg, gdf1, column=column, 
            cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, norm=norm, discrete=False, 
            plot_kwargs=plot_kwargs,
        )
        if len(gdf2) > 0:
            plot_kwargs.update(markersize=.6*s, marker='d')
            cax = plot_choropleth(
                fig, axg, gdf2, column=column, 
                cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, norm=norm, discrete=False, 
                plot_kwargs=plot_kwargs
            )
        axg.text(-175, 85, letters[i+1]+'. $\Delta$'+names.get(par,par)+diff_names[i+1], transform=crs_sub, fontsize='large')

    print(basename(fn_fig))
    plt.savefig(fn_fig)


## population exposed

In [None]:
gdf['people_affected_dH_perc'] = gdf['people_affected_dH'] /  np.maximum(gdf['people_affected_all'],1) * 100
gdf['people_affected_dH_dp_perc'] = gdf['people_affected_dH_dp'] /  np.maximum(gdf['people_affected_all_dp'],1) * 100
gdf['people_affected_dH_dp_percLECZ'] = np.minimum(gdf['people_affected_dH_dp'] / np.maximum(gdf['people_lecz'],1) * 100, 100)
gdf['people_affected_dH_dp_percALL'] = gdf['people_affected_dH_dp'] /  np.maximum(gdf['people_all'],1) * 100

In [None]:
import geopandas as gpd
basins = gpd.read_file(r'/home/dirk/models/cama-flood_bmi_v3.6.2/map/global_15min/flw_basins')
basins['area'] = basins.area
basins['rivmth_idx'] = basins['DN'] - 1
# basins = basins.sort_values(by='area',ascending=False).drop_duplicates(subset='rivmth_idx')
basins = basins.merge(gdf.drop(columns='geometry'), on='rivmth_idx', how='right')
# fig, ax = plt.subplots(1,1, figsize=(10,5))
# basins.plot(linewidth=0.1, ax=ax)

In [None]:
gdf_bas = basins.copy()
gdf1 = gdf_bas[gdf_bas[column]>0]
gdf2 = gdf_bas[~(gdf_bas[column]>0)]
gdf1['people_affected_dH_dp'] = np.where(gdf1['people_affected_dH_dp']<=0, 0, np.log10(gdf1['people_affected_dH_dp']))

plt.close('all')
fig = plt.figure(figsize=(18, 5))
grid = plt.GridSpec(1, 1, wspace=0.01, hspace=0.01)

# # map 1
# column = f'people_affected_dH_dp'
# label=f'population exposed [-]'
# vmin, vmax, n= 2, 6, 5
# cticks=np.linspace(vmin, vmax, n)
# cmap = ListedColormap(sns.cubehelix_palette(16, start=.5, rot=-.75))

# axg = fig.add_subplot(grid[0, 0], projection=crs_sub)
# basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, features=['land'], **bmap_kwargs)
# plot_kwargs.pop('marker',None)
# plot_kwargs.pop('markersize',None)
# plot_kwargs.update(linewidth=0.1)
# gdf2.plot(ax=axg, facecolor="white", hatch="///", edgecolor='grey')
# cax = plot_choropleth(
#     fig, axg, gdf1, column=column, 
#     cmap=plt.cm.Greens , cticks=cticks, vmin=vmin, vmax=vmax, discrete=False, 
#     plot_kwargs = plot_kwargs,
#     cbar_kwargs = dict(label=label, location='right'),
#     cbar_pos = dict(pad=0.02, fraction=0.01, shrink=0.5)
# )
# cax.set_yticklabels([f'$10^{e:0.0f}$' for e in cticks])
# axg.text(-175, 85, f'{letters[0]}', transform=crs_sub, fontsize='large')

# map 2
column = f'people_affected_dH_dp_perc'
label=f'percentage of expected \n annual population exposed [%]'
vmin, vmax, n= 0, 40, 5
cticks=np.linspace(vmin, vmax, n)
# axg2 = fig.add_subplot(grid[1, 0], projection=crs_sub)
axg2 = fig.add_subplot(grid[0, 0], projection=crs_sub)
basemap(axg2, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, features=['land'], **bmap_kwargs)
plot_kwargs.pop('marker',None)
plot_kwargs.pop('markersize',None)
plot_kwargs.update(linewidth=0.1)
plot_choropleth(
    fig, axg2, gdf1, column=column, 
    cmap=plt.cm.Blues, cticks=cticks, vmin=vmin, vmax=vmax, discrete=False, 
    plot_kwargs = plot_kwargs,
    cbar_kwargs = dict(label=label, location='right'),
    cbar_pos = dict(pad=0.02, fraction=0.01, shrink=0.5),
) 
gdf2.plot(ax=axg2, facecolor="white", hatch="///", edgecolor='grey', label='insignificant $\Delta$H')
# axg2.text(-175, 85, f'{letters[1]}', transform=crs_sub, fontsize='large')

fn_fig = join(fdir, f'expected_annual_people_exposed_{model}.png')
print(basename(fn_fig))
plt.savefig(fn_fig)

## drivers exposed

In [None]:
clist = plt.cm.tab10.colors
mmapp = {
    'insign': 'o',
    'compound': 'd',
    'H': '^', 
    'Q': 's', 
}
cmapp = {f'compound': clist[1],
         'H': clist[0], 
         'Q': clist[2], 
         'insign': 'grey'}
nmapp = {
    'H': 'Surge-dominant', 
    'Q': 'Discharge-dominant',
    'insign': 'Insignificant',
    'compound': 'Compound-dominant',
    'all': 'All locations'
}

nmapp_corr = {
    'H': 'Skew surge', 
    'Q': 'Discharge'}


label=f'spearman correlation [-]'

vmin, vmax, n= 0.5, 1, 6
cticks=np.linspace(vmin, vmax, n)
cmap  = plt.cm.Blues

plt.close('all')
fig = plt.figure(figsize=(18, 10.5))
grid = plt.GridSpec(3, 2, wspace=0.02, hspace=0.01)
        
# main map

gdf1 = gdf[gdf['Q_mean']>qmin].copy()
       
axg = fig.add_subplot(grid[:-1, :], projection=crs_sub)
basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, **bmap_kwargs)
# gdf0.plot(ax=axg, marker='x', color=(0, 0, 0, 0.6), markersize=12, linewidth=0.2, alpha=0.6, label='insignificant $\Delta h$')
for d in list(mmapp.keys())[::-1]:
    if f'driver_{d}' not in gdf1.columns: continue
    gdf1[gdf1[f'driver_{d}']==True].plot(
        ax=axg, marker=mmapp[d], color=cmapp[d], 
        markersize=s, linewidth=0.5, legend=False, 
        label = nmapp.get(d, d)
    )
axg.legend(loc='lower left', bbox_to_anchor=(0.05, 0.1))
axg.text(-175, 85, f'{letters[0]}. Dominant flood driver classification', transform=crs_sub, fontsize='large')

for i, v in enumerate(['H', 'Q']):
    # main map
    axg = fig.add_subplot(grid[-1, i], projection=crs_sub)
    basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False, **bmap_kwargs)
    column = f'driver_{v}_r'
    gdf0 = gdf[np.logical_and(gdf[f'driver_{v}_sign']==False, gdf['Q_mean']>qmin)]
    gdf1 = gdf[np.logical_and(gdf[f'driver_{v}_sign']==True, gdf['Q_mean']>qmin)].sort_values(by=column, ascending=True)
    gdf0.plot(ax=axg, marker='x', color='grey', markersize=0.6*s, linewidth=0.6, alpha=1, legend=False)
    plot_kwargs.update(markersize=0.6*s)
    cax = plot_choropleth(
        fig, axg, gdf1, column=column, 
        cmap=cmap, vmin=vmin, vmax=vmax, cticks=cticks, discrete=False,
        plot_kwargs=plot_kwargs,
        cbar_kwargs=dict(label=label, location='right', extend='min') if i == 1 else None,
        cbar_pos = dict(pad=0.02, fraction=0.015, shrink=0.8) 
    )
    axg.text(-175, 85, f'{letters[i+1]}. {nmapp_corr[v]}', transform=crs_sub, fontsize='large')
fn_fig = join(fdir, f'spearman_driver_{drivers[0]}_wdw{wdw}_{model}.png')
print(basename(fn_fig))
plt.savefig(fn_fig)

In [None]:
from scipy.stats import ttest_ind

vs = {
#     'Hseasrange': dict(label='seasonal sea level range [m]', ylim=[0, 0.5]),
    'Hskewsurge_amax': dict(label='$H_{SS}$ [m]', ylim=[0,1.5], title='Mean annual max. $H_{SS}$\n'),
    'Hskewsurge_amax_cv': dict(label='Coef. of variation  [-]', fmt='.2f', ylim=[0,0.6], title='Coef. of variation \nannual max. $H_{SS}$'),
    'Q_amax_log10': dict(label='Q [m$^3$ s$^{-1}$]', fmt='e', ylim=[0.5,4.0], title='Mean annual max. $Q$\n'),
    'Q_amax_cv': dict(label='Coef. of variation [-]', ylim=[0,1.5], title='Coef. of variation \nannual max. $Q$'),

    'Q_mean_log10': dict(label='Q [m$^3$ s$^{-1}$]', fmt='e', ylim=[0,3.2], title='Long term mean $Q$'),
#     'runoff': dict(label='mean runoff [mm]'),
    'uparea_log10': dict(label='Area [km$^2$]', fmt='e', ylim=[3,5], title='Catchment area'),
#     'uparea_1e3': dict(label='Area [1000 km$^2$]', fmt='.0f', ylim=[1,70], title='Catchment area'),
    'mean_drain_length': dict(label='Length [km]', ylim=[0,400], title='Drainage length'),
    'mean_drain_slope': dict(label='Slope [m km$^{-1}$]', fmt='.2f', ylim=[0,8], title='Drainage slope'),
}

box_kwargs=dict(whis=[5,95], boxprops=dict(linewidth=1.), medianprops=dict(linewidth=1.5), 
                showfliers=False, flierprops=dict(markersize=2), showmeans=True, meanprops=dict(marker='s', markersize=5))

gdf['Q_amax_log10'] = np.log10(np.maximum(1, gdf['Q_amax']))
gdf['Q_mean_log10'] = np.log10(np.maximum(1, gdf['Q_mean']))
gdf['uparea_log10'] = np.log10(np.maximum(1, gdf['uparea']))
gdf['uparea_1e3'] = gdf['uparea']/1e3
gdf['runoff'] = gdf['Q_mean'] / (gdf['uparea']*1e6) * 1e3 #[mm]
gdf['driver'] = gdf['driver_H']+gdf['driver_Q']*2+gdf[f'driver_compound']*3


dh=0.12
var = f'diff_h_surge_tide'
df1 = gdf  #[gdf[f'{var}_sign']==1].drop(columns=['geometry'])


ncol = int(np.ceil(len(vs)/2))
fig = plt.figure(figsize=(ncol*3, 2*3))
grid = plt.GridSpec(2, ncol, hspace=0.2, wspace=0.55)

for i, name in enumerate(vs):
    irow = int(i // ncol)
    icol = int(i % ncol)
    ax2 = fig.add_subplot(grid[irow, icol])
    
    samples = {v: df1[[name]][df1[f'driver_{v}']==1] for v in ['Q', 'H', 'compound']}
    samples['all'] = df1[[name]]
    
#     if '_log10' in name:
#         name2 = name.replace('_log10', '')
#         samples2 = {v: df1[[name2]][df1[f'driver_{v}']==1] for v in ['Q', 'H', 'compound']}
#         samples2['all'] = df1[[name2]]
#     else:
    samples2 = samples

    
    for ii, v in enumerate(['all','Q', 'H', 'compound']):
        sign = True
        if v != 'all':
            other = [v1 for v1 in ['Q', 'H', 'compound'] if v1 != v]
            sign = np.all(np.array([float(ttest_ind(samples2[v], samples2[v1], equal_var=True)[1]) for v1 in other])<0.01)
        positions=[0.5*dh+(ii-2)*dh*1.5]       
        dd = samples[v].boxplot(column=name, widths=dh, positions=positions, ax=ax2, return_type='dict', **box_kwargs)
        # plot lines for legend
        if i == len(vs)-1:
            ax2.plot([2,2],[0,2], color=cmapp.get(v, 'k'), label=nmapp.get(v,v))
        c = cmapp.get(v, 'k')
        for box in dd['boxes']:
            box.set_color(c)
        for med in dd['medians']:
            med.set_color(c)
        for w in dd['whiskers']:
            w.set_color(c)
        for cap in dd['caps']:
            cap.set_color(c)
        for p in dd['means']:
            if v == 'all':
                p.set_marker('x')
            if sign:
                p.set_markerfacecolor(c)
            else:
                p.set_markerfacecolor('none')
            p.set_markeredgecolor(c)

#     ax2.plot([2],[0], marker='s', markerfacecolor='k', label='mean', linewidth=0, markeredgecolor='k' )
    ylim = vs[name].get('ylim', None)
    if ylim is not None:
        ymin, ymax = ylim
    else:
        ymin, ymax = samples['all'][name].quantile([0.00,0.99])
        
    fmt = vs[name].get('fmt', None)
    if fmt == 'e':
        yticks = np.arange(np.floor(ymin), np.ceil(ymax), 1)
        yticklabels = [f'10$^{t:.0f}$' for t in yticks]
        ax2.set_yticks(yticks)
        ax2.set_yticklabels(yticklabels)
    
    lab = vs[name]['label']
    title = vs[name].get('title', name)
    ax2.set_ylabel(lab)
    ax2.set_xlim([-0.45,0.4])
    ax2.set_ylim([ymin, ymax])
    ax2.set_xticks([])
    ax2.set_title(f'{letters[i]}. {title}')
#     ax2.text(0.05,0.93, letters[i], transform=ax2.transAxes)

ax2.legend(loc='lower left', bbox_to_anchor=(1.05, 0.5), title='main driver')
# fig.suptitle('')

fn = join(fdir, f'boxplot_classification_{model}.png')
print(basename(fn))
plt.savefig(fn)