In [None]:
import xarray as xr
import pandas as pd
import numpy as np
from os.path import join, basename

In [None]:
%matplotlib inline
import matplotlib as mpl
# mpl.rcParams.keys()
import matplotlib.pyplot as plt
import seaborn as sns
rc = {'savefig.bbox': 'tight',  'savefig.format': 'png', 'savefig.dpi':300}
context = 'paper'# 'talk'
sns.set(context=context, style='whitegrid', font_scale=0.75 if context == 'talk' else 1., rc=rc)
sns.set(context=context, style='whitegrid', font_scale=1.3 if context == 'paper' else 1., rc=rc)
from string import ascii_uppercase as letters

In [None]:
# import gis plotting packages
from gistools import pandas2geopandas
import gistools.plot_tools as gplt
import cartopy.crs as ccrs
from plot_tools import *

In [None]:
ddir = r'/scratch/compound_hotspots/data/4-postprocessed'
grdc_dir = r'/scratch/grdc'
fig_dir = r'/scratch/compound_hotspots/reports/figures'
fn_grdc = join(grdc_dir, r'grdc_discharge_1980-2014_v20180912.csv')

# naming
models_rename = {
    "anu": "W3RA (ANU)", 
    "nerc": "Jules (NERC)", 
    "cnrs": "Orchid. (CNRS)", 
    "ecmwf": "HTESS. (ECMWF)",
    "jrc": "LISFL. (JRC)", 
#     "univk": "W.Gap3 (UNIVK)", 
#     "univu": "PCR-WB (UNIVU)", 
    "mean": "ensemble mean"
}
model_seq = [v for k, v in models_rename.items()]

## validation

### select grdc data

In [None]:
obs_name = 'grdc'
fn_pm = join(ddir, r'cmf_v362_e2o_validation_grdc_pm_am.nc')
pm_am = xr.open_dataset(fn_pm)
pm_am_med = pm_am.mean('ensemble').expand_dims('ensemble')
pm_am_med['ensemble'] = xr.Variable('ensemble', ['mean'])
pm_am = xr.concat([pm_am, pm_am_med], 'ensemble')

fn_pm = join(ddir, r'cmf_v362_e2o_validation_grdc_pm.nc')
pm = xr.open_dataset(fn_pm)
pm_med = pm.mean('ensemble').expand_dims('ensemble')
pm_med['ensemble'] = xr.Variable('ensemble', ['mean'])
pm = xr.concat([pm, pm_med], 'ensemble')

In [None]:
# load meta data
df_meta = pd.read_csv(fn_grdc, index_col=0).reindex(pm['grdc_id'])
# select natural most downstream stations
postfix='nat'
df_meta = df_meta[np.logical_and.reduce((
    df_meta['nathum_human'] == 0,
    df_meta['ds_stat_no'] >= 0
))]
pm = pm.sel(grdc_id=df_meta.index)

pm_am = pm_am.sel(grdc_id=df_meta.index)
pm_am = pm_am.where(pm_am['doy_uniform_p']<0.05, drop=True)

print(pm.grdc_id.size, pm_am['grdc_id'].size)

In [None]:
model='mean'
obs_name='grdc'
# max_count1, max_count2 = 70, 35
max_count1, max_count2 = 260, 120

pm_sel = pm.sel(ensemble=model)
pm_am_sel = pm_am.sel(ensemble=model)

n1, n2 = pm_sel.grdc_id.size, pm_am_sel.grdc_id.size
n1, n2

## analysis

In [None]:
snap_df = pd.read_csv(join(grdc_dir, r'20170124_GRDC_Stations_snap_2dist1e+04_1upa5.0e-01.csv'), index_col=0)
snap_df = snap_df.reindex(df_meta.index)

In [None]:
pm['kge'].to_series().unstack(0).rename(columns=models_rename)[model_seq].describe().loc[['25%', '50%', '75%'], :]

In [None]:
pm['kge_bias'].to_series().unstack(0).rename(columns=models_rename)[model_seq].describe().loc[['25%', '50%', '75%'], :]

In [None]:
(1-pm['kge_bias']).to_series().apply(np.abs).unstack(0).rename(columns=models_rename)[model_seq].describe().loc[['25%', '50%', '75%'], :]

In [None]:
pm['kge_pearson_coef'].to_series().unstack(0).rename(columns=models_rename)[model_seq].describe().loc[['25%', '50%', '75%'], :]

In [None]:
pm['lag'].to_series().unstack(0).rename(columns=models_rename)[model_seq].describe().loc[['25%', '50%', '75%'], :]

In [None]:
pm_am['am_bias'].to_series().unstack(0).rename(columns=models_rename)[model_seq].describe().loc[['25%', '50%', '75%'], :]

In [None]:
pm_am['am_rank_corr'].to_series().unstack(0).rename(columns=models_rename)[model_seq].describe().loc[['25%', '50%', '75%'], :]

In [None]:
pm_am['am_doy_diff'].to_series().unstack(0).apply(np.abs).rename(columns=models_rename)[model_seq].describe().loc[['25%', '50%', '75%'], :]

### Figure validation 1 - multi model ensemble boxplots

In [None]:
box_kwargs=dict(whis=[5,95], boxprops=dict(linewidth=1.), medianprops=dict(linewidth=1.5), 
                showfliers=False, flierprops=dict(markersize=2))
fig, ((ax1, ax3, ax4), (ax11, ax12, ax13)) = plt.subplots(2,3, figsize=(15, 10), sharey=True, 
                                                          gridspec_kw=dict(wspace=0.15, hspace=0.3))

data = pm['kge_bias'].to_series().unstack(0).rename(columns=models_rename)[model_seq]
sns.boxplot(data=data, ax=ax1, orient="h", **box_kwargs)
ax1.set_xlim(-0.1, 3.1)
ax1.set_xlabel('bias [-]')
ax1.set_title(f'{letters[0]}. Bias')
ax1.set_ylabel('models - daily', fontsize=14)
    
data = pm['kge_pearson_coef'].to_series().unstack(0).rename(columns=models_rename)[model_seq]
sns.boxplot(data=data, ax=ax3, orient="h", **box_kwargs)
ax3.set_xlim(-0.05, 1.0)
ax3.set_xlabel('pearson rho [-]')
ax3.set_ylabel('')
ax3.set_title(f'{letters[1]}. Correlation')
   
data = pm['lag'].to_series().unstack(0).rename(columns=models_rename)[model_seq]
sns.boxplot(data=data, ax=ax4, orient="h", **box_kwargs)
ax4.set_xlim(-10, 10)
ax4.set_xlabel('lag [days]')
ax4.set_ylabel('')
ax4.set_title(f'{letters[2]}. Time lag (cross correlation)')
   
data = pm_am['am_bias'].to_series().unstack(0).rename(columns=models_rename)[model_seq]
sns.boxplot(data=data, ax=ax11, orient="h", **box_kwargs)
ax11.set_xlim(-0.1, 3.1)
ax11.set_xlabel('bias [-]')
ax11.set_title(f'{letters[3]}. AM bias')
ax11.set_ylabel('models - annual maxima', fontsize=14)

data = pm_am['am_rank_corr'].to_series().unstack(0).rename(columns=models_rename)[model_seq]
sns.boxplot(data=data, ax=ax12, orient="h", **box_kwargs)
ax12.set_xlim(-0.1, 1.0)
ax12.set_xlabel('spearman rho [-]')
ax12.set_ylabel('')
ax12.set_title(f'{letters[4]}. AM rank correlation')
    
data = pm_am['am_doy_diff'].to_series().unstack().T.rename(columns=models_rename)[model_seq]
sns.boxplot(data=data, ax=ax13, orient="h", **box_kwargs)
ax13.set_xlim(-60, 60)
ax13.set_xlabel('lag [days]')
ax13.set_ylabel('')
ax13.set_title(f'{letters[5]}. AM Time lag (mean flood day)')

fn = join(fig_dir, '{}_{}_validation_{}').format(context, obs_name, postfix)
plt.savefig(fn)

## figure 2 - map

In [None]:
import cartopy.crs as ccrs
import cartopy.feature as cfeature
cl = cfeature.COLORS['land_alt1']
crs = ccrs.PlateCarree()
cmap = plt.cm.viridis_r
vmin, vmax, n =0, 1, 11
cticks=np.linspace(vmin, vmax, n)

# 
column = 'kge'
if obs_name == 'grdc':
    model='mean'
    var = pm[column].sel(ensemble=model).to_series().sort_values()
else:
    var = pm_sel[column].to_series().sort_values()

    
    
gdf = pandas2geopandas(df_meta)#.to_crs(crs.proj4_init)
gdf = gdf.reindex(var.index)
gdf[column] = var


fig = plt.figure(figsize=(15, 10))
axg = fig.add_subplot(projection=crs)
basemap(axg, bbox=(-180, -60, 180, 90), gridlines=False, outline=False,)
plot_choropleth(
    fig, axg, gdf, column=column, 
    cmap=cmap,  cticks=cticks, vmin=vmin, vmax=vmax, discrete=False,
    cbar_kwargs=dict(label=f'{models_rename[model]} {column.upper()} [-]', location='right'), 
    cbar_pos = dict(pad=0.02, fraction=0.01, shrink=0.6),
    plot_kwargs=dict(markersize=30, edgecolor=(0.5, 0.5, 0.5, 0.5), linewidth=0.5, zorder=2,
#                     label='selected {} gauges (n = {:d})'.format(obs_name, len(gdf))
                    )
    )
# gdf.plot(ax=ax, zorder=3, markersize=10, color='green', 
#          label='selected {} gauges (n = {:d})'.format(obs_name, len(dfg)), )

# ax.legend(loc='lower center')
xlim, ylim = ax.get_xlim(), ax.get_ylim()
print(xlim, ylim)
ax.set_xlim([-max(xlim), max(xlim)]) 
ax.set_ylim([-max(ylim), max(ylim)])
fn = join(fig_dir, f'{context}_{obs_name}_validation_{column}_{model}_{postfix}')
print(basename(fn))
plt.savefig(fn)