In [None]:
import pandas as pd
from datetime import datetime, timedelta
import xarray as xr
import numpy as np
from os.path import join, isfile
from copy import deepcopy

In [None]:
import sys
import os
sys.path.append(os.path.abspath('../3-postprocess/'))
from xstats import xrankdata

In [None]:
root = r'/scratch/compound_hotspots'
ddir = join(root, 'data', '4-postprocessed')

## compound EV

In [None]:
fn_ev = join(ddir, 'rivmth_AMpeaks_d14_ev_gumb.nc')
fn_ev_out = join(ddir, 'rivmth_AMpeaks_d14_ev_gumb_stats.nc')

ds_ev = xr.open_dataset(fn_ev)

da_wse_ev = ds_ev['WSE_ev']#.sel(ensemble='cnrs')
diff_cmpnd_surge = (da_wse_ev.sel(scen='cmpnd') - da_wse_ev.sel(scen='surge'))
diff_cmpnd_surge.name = 'diff_surge'
diff_cmpnd_runoff = (da_wse_ev.sel(scen='cmpnd') - da_wse_ev.sel(scen='runoff'))
diff_cmpnd_runoff.name = 'diff_runoff'

runoff_is_main_driver = da_wse_ev.sel(scen='runoff') >= da_wse_ev.sel(scen='surge')
runoff_is_main_driver.name = 'runoff'
surge_is_main_driver = da_wse_ev.sel(scen='runoff') < da_wse_ev.sel(scen='surge')
surge_is_main_driver.name = 'surge'

compound_positive = np.logical_and(diff_cmpnd_surge>0, diff_cmpnd_runoff>0)
compound_positive.name = 'compound'

ds_cmpnd = xr.merge([
    diff_cmpnd_runoff,
    diff_cmpnd_surge,
    runoff_is_main_driver,
    surge_is_main_driver,
    compound_positive,
])

ds_cmpnd.to_netcdf(fn_ev_out)

In [None]:
q = 95
min_dist = 30
Npeaks = 50

## top 50 peaks accross scenarios

In [None]:
fn_peaks_all = join(ddir, f'rivmth_peaks_q{q}d{min_dist}_top{Npeaks}_all.nc')
ds_peaks_all = xr.open_dataset(fn_peaks_all)

# frequency
fn_peaks_all_freq = join(ddir, f'rivmth_peaks_q{q}d{min_dist}_top{Npeaks}_all_freq.nc')
npeaks = ds_peaks_all['peaks_max'].sum('rank')
npeaks_perc = npeaks / npeaks.sum('scen')
npeaks_perc.name = 'peak_perc'
npeaks_perc.to_netcdf(fn_peaks_all_freq)

# diff
da_wse = ds_peaks_all['WSE']
diff_surge_seas = (da_wse.sel(scen='surge') - da_wse.sel(scen='seas')).mean('rank')
diff_surge_seas.name = 'diff_surge_seas'
diff_seas_tide = (da_wse.sel(scen='seas') - da_wse.sel(scen='tide')).mean('rank')
diff_seas_tide.name = 'diff_seas_tide'
diff_surge_tide = (da_wse.sel(scen='surge') - da_wse.sel(scen='tide')).mean('rank')
diff_surge_tide.name = 'diff_surge_tide'
diff = xr.merge([
    diff_surge_seas,
    diff_seas_tide,
    diff_surge_tide
])

# ratio
fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
da_wse_peak = ds_peaks_all['WSE']
da_wse_ts_std = xr.open_zarr(fn_rivmth_ts)['WSE'].sel(scen='surge').std('time')
ratio = diff/da_wse_ts_std
ratio = ratio.rename({k: k.replace('diff','ratio') for k in ratio.data_vars.keys()})

# out
fn_peaks_all_ratio = join(ddir, f'rivmth_peaks_q{q}d{min_dist}_top{Npeaks}_all_stat.nc')
ds_out = xr.merge([diff, ratio])
ds_out.to_netcdf(fn_peaks_all_ratio)

## RPs fit

In [None]:
fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
da_wse = xr.open_zarr(fn_rivmth_ts)['WSE'].sel(scen='surge')
da_wse_std = da_wse.std('time').reset_coords(drop=True)

In [None]:
fn_peaks_rp = join(ddir, f'rivmth_peaks_q{q}d{min_dist}_rp.nc')
fn_peaks_ratio = join(ddir, f'rivmth_peaks_q{q}d{min_dist}_rp_ratio_gevfit.nc')
da_wse_rps = xr.open_dataset(fn_peaks_rp)['WSE'] 
rps = da_wse_rps['rp'].values

# ratio
diff_surge_seas = (da_wse_rps.sel(scen='surge') - da_wse_rps.sel(scen='seas'))
diff_surge_seas.name = 'diff_surge_seas'
ratio_surge_seas = diff_surge_seas / da_wse_std
ratio_surge_seas.name = 'ratio_surge_seas'

diff_seas_tide = (da_wse_rps.sel(scen='seas') - da_wse_rps.sel(scen='tide'))
diff_seas_tide.name = 'diff_seas_tide'
ratio_seas_tide = diff_seas_tide / da_wse_std
ratio_seas_tide.name = 'ratio_seas_tide'

diff_surge_tide = (da_wse_rps.sel(scen='surge') - da_wse_rps.sel(scen='tide'))
diff_surge_tide.name = 'diff_surge_tide'
ratio_surge_tide = diff_surge_tide / da_wse_std
ratio_surge_tide.name = 'ratio_surge_tide'


ds_out = xr.merge([
    diff_surge_seas,
    diff_seas_tide,
    diff_surge_tide,
    ratio_surge_seas,
    ratio_seas_tide,
    ratio_surge_tide,
])
ds_out.to_netcdf(fn_peaks_ratio)
ds_out

## RPs interpolated

In [None]:
import scipy as sp
import scipy.interpolate

def _interp_rps(y, rps_in, rps_out, kind='linear'):
    return sp.interpolate.interp1d(np.log10(rps_in), y, kind=kind)(np.log10(rps_out))

def interp_rps(da, rps_it, rps_out, dim='rank'):    
    ds_out = xr.apply_ufunc(
        _interp_rps, 
        da, 
        kwargs=dict(rps_in=rps_in, rps_out=rps_out),
        input_core_dims=[[dim]], 
        output_core_dims=[['rp']], 
        vectorize=True, 
        dask='allowed', 
        output_dtypes=[float],
        output_sizes={'rp':rps_out.size}
    )
    ds_out['rp'] = xr.Variable('rp', rps_out)
    return ds_out


In [None]:
fn_peaks_rank = join(ddir, f'rivmth_peaks_q{q}d{min_dist}_top{Npeaks}_scen.nc')
fn_peaks_ratio = join(ddir, f'rivmth_peaks_q{q}d{min_dist}_rp_ratio_interp.nc')
ds_peaks_rank = xr.open_dataset(fn_peaks_rank, chunks={'index':500, 'rank':-1})
da_wse = ds_peaks_rank['WSE']
rps_in = ds_peaks_rank['rp'].values
# rps_out = np.array([1,2,5,10])
rps_out = rps[:-1]
da_wse_rps = interp_rps(da_wse, rps_in, rps_out)

# ratio
diff_surge_seas = (da_wse_rps.sel(scen='surge') - da_wse_rps.sel(scen='seas'))
diff_surge_seas.name = 'diff_surge_seas'
ratio_surge_seas = diff_surge_seas / da_wse_std
ratio_surge_seas.name = 'ratio_surge_seas'

diff_seas_tide = (da_wse_rps.sel(scen='seas') - da_wse_rps.sel(scen='tide'))
diff_seas_tide.name = 'diff_seas_tide'
ratio_seas_tide = diff_seas_tide / da_wse_std
ratio_seas_tide.name = 'ratio_seas_tide'

diff_surge_tide = (da_wse_rps.sel(scen='surge') - da_wse_rps.sel(scen='tide'))
diff_surge_tide.name = 'diff_surge_tide'
ratio_surge_tide = diff_surge_tide / da_wse_std
ratio_surge_tide.name = 'ratio_surge_tide'

ds_out = xr.merge([
    diff_surge_seas,
    diff_seas_tide,
    diff_surge_tide,
    ratio_surge_seas,
    ratio_seas_tide,
    ratio_surge_tide
])

ds_out.to_netcdf(fn_peaks_ratio)
ds_out

In [None]:
# def _bootstrap(x, alphas=[0.1, 0.9], n_samples=10000):
#     x_boot = x[np.random.randint(x.size, size=(x.size, n_samples))]
#     x_boot_sorted = np.apply_along_axis(np.sort, arr=x_boot, axis=-1)
#     alphas = np.atleast_1d(alphas)
#     nvals = np.nan_to_num(np.round((n_samples-1)*alphas)).astype('int')
#     return stat_sorted[..., nvals]
# peaks_ranked.isel(ensemble=1, index=1).reset_coords()['water_level'].values

## timing

In [None]:
from temp_stats import mean_flood_day_diff, mean_flood_day_stats

scens = ['surge', 'tide']
fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
fn_peaks_timing = join(ddir, f'rivmth_peaks_timing.nc')

da_wse = xr.open_zarr(fn_rivmth_ts)['WSE'].sel(scen=scens)
# doy_stats = mean_flood_day_stats(da_wse)
doy_diff = mean_flood_day_diff(doy_stats['doy'].sel(scen=scens[0]), doy_stats['doy'].sel(scen=scens[1]))

ds_timing = xr.merge([doy_diff, doy_stats]).reset_coords(drop=True)
ds_timing.to_netcdf(fn_peaks_timing)
# ds_timing

## atrs

In [None]:
fn_attrs_out = join(ddir, 'rivmth_mean_attrs.nc')

fn_csv_coupling = join(root, 'src', '1-prepare', r'cmf_gtsm_75km_update191017.csv')
coupling = pd.read_csv(fn_csv_coupling, index_col='index')

fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
ds_drivers = xr.open_zarr(fn_rivmth_ts).drop('WSE').mean('ensemble').sel(scen='surge').drop('scen')
ds_drivers['Htiderange'] = (ds_drivers['Htide_day_max'] - ds_drivers['Htide_day_min'])
ds_drivers_amax = ds_drivers[['Q', 'Hseas_day_mean', 'Hsurge_day_max', 'Hskewsurge_day', 'Htiderange']].resample(time='A').max().mean('time')
ds_drivers_amax = ds_drivers_amax.rename({n: '{}_amax'.format(n.split('_')[0]) for n in ds_drivers_amax.data_vars.keys()})
ds_drivers_amin = ds_drivers[['Q', 'Hseas_day_mean', 'Htiderange', 'Hskewsurge_day']].resample(time='A').min().mean('time')
ds_drivers_amin = ds_drivers_amin.rename({n: '{}_amin'.format(n.split('_')[0]) for n in ds_drivers_amin.data_vars.keys()})
ds_drivers_mean = ds_drivers[['Q', 'Htiderange']].mean('time')
ds_drivers_mean = ds_drivers_mean.rename({n: '{}_mean'.format(n.split('_')[0]) for n in ds_drivers_mean.data_vars.keys()})

da_wse = xr.open_zarr(fn_rivmth_ts)['WSE'].sel(scen='surge')
da_wse_std = da_wse.std('time').reset_coords(drop=True).mean('ensemble')
da_wse_std.name = 'wse_std'

attrs = xr.merge([
    coupling[['dist', 'dist2coast', 'rivwth', 'uparea', 'rivhgt', 'elevtn', 'gtsm_egm_offset']].to_xarray(),
    ds_drivers_amax,
    ds_drivers_amin,
    ds_drivers_mean,
    da_wse_std
]).astype(np.float32)
attrs.to_netcdf(fn_attrs_out)
xr.open_dataset(fn_attrs_out).to_dataframe().to_csv(fn_attrs_out.replace('.nc','.csv'))