In [None]:
import xarray as xr
from os.path import join
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## read and align data

In [None]:
# data directory
ddir = r'../../1_data/2_forcing'
rdir = r"../../4_results"

# data labels
labels = {
    'qb': 'Discharge Buzi\n[m3/s]',
    'qp': 'Discharge Pungwe\n[m3/s]',
    'p': 'Rainfall\n[mm/hr]',
    # 't': 'Tide\n[m+MSL]',
    's': 'Surge\n[m]',
    'w': 'Sign. wave height\n[m]',
    # 'h_ts': 'Total waterlevel\n[m+MSL]',
    'h_tsw': 'Total waterlevel (incl. wave setup)\n[m+MSL]',
    # 'ss': 'Skew surge\n[m]',
    # 'ssw': 'Skew surge (incl. wave setup)\n[m]',
    # 'sw': 'Non-tidal residual\n[m]'
}

In [None]:
import hydromt
from eva import eva_block_maxima, get_peaks, get_peak_hydrographs

# discharge
fnq = join(ddir, 'cama_discharge_beira_daily.nc')
daq = xr.open_dataset(fnq)['discharge'].load()
dsq = xr.merge([
    daq.sel(index=1).rename('qb').reset_coords(drop=True),
    daq.sel(index=4).rename('qp').reset_coords(drop=True)
])
# fnq = r'../../3_models/wflow/run_vito_ksath1000/output_src.nc'
# daq = xr.open_dataset(fnq)['q_river'].load()
# dsq = xr.merge([
#     daq.sel(index=0).rename('qb').reset_coords(drop=True),
#     daq.sel(index=3).rename('qp').reset_coords(drop=True)
# ])

# save qbankfull at river inflow locations of SFINCS
dsq_eva = eva_block_maxima(
    daq, period = 'AS-AUG', min_dist = 14,
)
gdf_qbf = dsq_eva['return_values'].sel(rps=2).to_dataset().vector.to_gdf().rename(columns={'return_values': 'qbankfull'})
gdf_qbf.to_file(fnq.replace('.nc', '_qbf.geojson'), driver='GeoJSON')
gdf_qbf['qbankfull']

In [None]:
# GTSM waterlevels + ERA5 waves
# contains: "waterlevel" (tide+surge), "tide", "surge", "shww"
fnh = join(ddir, 'reanalysis_gtsm_v1_beira_extended.nc')
dsh0 = xr.open_dataset(fnh).load()
dsh0 = dsh0.rename({'waterlevel': 'h_ts', 'surge': 's', 'tide': 't', 'shww': 'w'})
dsh0['h_tsw'] = dsh0['h_ts'] + 0.2*dsh0['w']
dsh0['sw'] = dsh0['s'].fillna(0) + 0.2*dsh0['w']
# skew surge (not used)
# high_tide = get_peaks(dsh0['t'].load(), period='12H').dropna('time').reindex_like(dsh0, 'nearest')
# dsh0['ss'] = dsh0['h_ts'] - high_tide
# dsh0['ssw'] = dsh0['h_tsw'] - high_tide

In [None]:
# ERA5 precipitation
fnp = join(ddir, 'era5_precip_beira_hourly_spatialmean.nc')
dap0 = xr.open_dataset(fnp, chunks='auto')['precip'].load()

In [None]:
# resample to day
dates = pd.date_range('19800102', '20210201', freq='D')
# dap = dap0.resample(time='1D', label='right').sum('time')
# dsh = dsh0.resample(time='1D', label='right').max('time')
dap = dap0.rolling(time=24, center=True, min_periods=1).mean('time').reindex(time=dates)
dsh = dsh0.rolling(time=6*24, center=True, min_periods=1).max('time').reindex(time=dates)

# merge all variables to singlge dataset
ds = xr.merge([
    dsq.reindex(time=dates),
    dsh,
    dap.rename('p')
], compat='override').reset_coords(drop=True)#.reindex(time=dates)
for var in ds.data_vars:
    long_name, unit = labels[var].split('\n')
    ds[var].attrs.update({'long_name': long_name, 'unit': unit[1:-1]})
ds.attrs = {}
ds = ds[['qb', 'qp', 'p', 'h_tsw', 't', 's', 'w']]
encoding = {var: {'zlib': True} for var in ds.data_vars}
# ds.to_netcdf(join(ddir, 'beira_drivers_daily.nc'), encoding=encoding)

In [None]:
ds = xr.open_dataset(join(ddir, 'beira_drivers_daily.nc'))
# ds = ds[['qb', 'qp', 'p', 'h_tsw', 't', 's', 'w']]
drivers = [v for v in ['qb', 'qp', 'p', 's', 'w'] if v in ds]
drivers

## get annual maxima peaks

In [None]:
from eva import get_peak_hydrographs, get_peaks

# settings
period='AS-AUG'

ds_peaks = xr.Dataset(coords=ds.coords)
for dvar in ds.data_vars.keys():
    if dvar == 't': continue
    ds_peaks[dvar] = get_peaks(ds[dvar], period=period, min_dist=14, min_sample_size=0)

# peaks with dates
df_peaks0 = ds_peaks.reset_coords(drop=True).dropna('time', how='all').to_dataframe()  

# get maximum values within time window for non_extremes
df_peaks0_filled = pd.DataFrame()
ds_tmax = ds.rolling(time=7).max('time').sel(time=df_peaks0.index)
# ds_tmax = ds.sel(time=df_peaks0.index)
for dvar in df_peaks0.columns:
    df_peaks0_filled[dvar] = df_peaks0[dvar].where(df_peaks0[dvar].notna(), ds_tmax[dvar])

# peaks with regular spaced interval
df_bm = df_peaks0.resample(period).max()#.dropna()

# save peaks to csv
df_peaks0.to_csv(join(rdir, 'drivers_am_peaks.csv'))

In [None]:
from string import ascii_uppercase as letters

n = len(ds_peaks.data_vars)
fig, axes = plt.subplots(n, 1, figsize=(12, 3*n), sharex=True)
for i, dvar in enumerate(labels.keys()):
    ds[dvar].to_series().plot(ax=axes[i], color='k')
    df_peaks0[dvar].plot(ax=axes[i], color='r', marker='.', lw=0)
    axes[i].set_ylabel(labels[dvar])
    title = labels[dvar].split('\n')[0]
    axes[i].set_title(f'{letters[i]}) {title}')
plt.savefig(join(r'../../4_results', f'drivers_timeseries.png'), dpi=300, bbox_axes='tight')

## fit uni-variate eva

In [None]:
from eva import lmoment_fitopt, get_frozen_dist, _get_return_periods, _RPS, rps_dist, emperical_dist

# prepare surge extremes
fileS = 'Beira_STORM_surges.nc' #We look at the 3000 yr of data
da_surge = xr.open_dataarray(join(ddir, fileS))#[-3000:]

x_etc = ds_peaks['s'].dropna('time').to_series().sort_values().values[:-2]  # filter 2 TCs
params, dist = lmoment_fitopt(x_etc, distributions=['gev', 'gumb'], criterium='AIC')
dist_etc = get_frozen_dist(params, dist)

x_tc = da_surge.values.flatten()
dist_tc = emperical_dist(x_tc, 3000)

# combine: rp(x) = 1 / (1/rp(x_TC) + 1/rp(x_ETC))
xs = np.arange(0.1, np.max(x_tc), 0.1)
rp_tc = 1/dist_tc.sf(xs)
rp_etc = 1/(1-dist_etc.cdf(xs))
rp_tot = 1/(1/rp_etc + 1/rp_tc)
dist_surge = rps_dist(rp_tot, xs)

# plot
fgumbplot = lambda x: -np.log(-np.log(1.0 - 1.0 / x))

fig, ax = plt.subplots(1,1)
ax.plot(fgumbplot(_get_return_periods(x_etc)), x_etc, '.b', label='ETC')
ax.plot(fgumbplot(_get_return_periods(x_tc, extremes_rate=x_tc.size/3000)), x_tc, '.r', label='TC')
ax.plot(fgumbplot(rp_tot), xs, '--k', lw=2, label='combined')
ax.set_ylabel("Return value")
ax.set_xticks(fgumbplot(_RPS))
ax.set_xticklabels(_RPS)
ax.set_xlabel("Return period")
ax.set_xlim([fgumbplot(1.1), fgumbplot(1000)])
ax.grid()
ax.legend()


In [None]:
df_surge_emp_dist = pd.DataFrame(columns=['rp[year]', 'surge[m]'], data=np.vstack([rp_tot,xs]).T)
df_surge_emp_dist.to_csv(join(rdir, 'marginal_surge.csv'))

In [None]:
# read AM from stochastic event set to create the figure (this can be skipped at first iteration)
df_sim_am0 = pd.read_csv(join(rdir, 'sim_AM.csv'), index_col=0)
dist_htsw = emperical_dist(df_sim_am0['h_tsw'].values, df_sim_am0['h_tsw'].size)


In [None]:
from eva import lmoment_fitopt, get_frozen_dist, plot_return_values, _RPS, _get_return_values, eva_idf
fgumbplot = lambda x: -np.log(-np.log(1.0 - 1.0 / x))

n = len(ds_peaks.data_vars)
dparams = ['shape', 'loc', 'scale']
distributions = ['gev', 'gumb']#[1:]

fig, axes = plt.subplots(n, 1, figsize=(12, 3*n), sharex=True)

df_eva = pd.DataFrame(columns=['dist'] + dparams)
df_rps = pd.DataFrame(index=np.hstack([[1.1], _RPS]))
df_rps.index.name = 'rps'
dists = {}
print(df_rps)
# use marginal distributions to transform quantiles back to normal space
for dvar in df_eva.index:
    params = df_eva.loc[dvar, dparams].dropna()
    dist = df_eva.loc[dvar, 'dist']

for i, dvar in enumerate(labels.keys()):

    if dvar == 's':
        dists[dvar] = dist_surge
        df_rps[dvar] = dist_surge.ppf(1-1/df_rps.index.values)
        axes[i].plot(fgumbplot(_get_return_periods(x_etc)), x_etc, 'xk', label='non tropical cyclones')
        axes[i].plot(fgumbplot(_get_return_periods(x_tc, extremes_rate=x_tc.size/3000)), x_tc, '.k', label='tropical cyclones', alpha=0.5)
        axes[i].plot(fgumbplot(rp_tot), xs, '--k', lw=2, label='combined')
        axes[i].set_ylabel("Return value")
        axes[i].set_xticks(fgumbplot(_RPS))
        # axes[i].set_xticklabels(_RPS)
        # axes[i].set_xlabel("Return period")
        axes[i].grid()        
        axes[i].legend()
    elif dvar == 'h_tsw':
        dists[dvar] = dist_htsw
        df_rps[dvar] = dists['h_tsw'].ppf(1-1/df_rps.index.values)
        axes[i].plot(fgumbplot(1/dists['h_tsw'].freq), dists['h_tsw'].data, '.k', label='stochastic event set')
        axes[i].set_ylabel("Return value")
        axes[i].set_xticks(fgumbplot(_RPS))
        axes[i].set_xticklabels(_RPS)
        axes[i].set_xlabel("Return period [year]")
        axes[i].grid()        
        axes[i].legend()
        axes[i].set_ylim([4.5,7.5])
    else:
        if dvar == 'p':
            durations=np.array([1, 2, 3, 6, 12, 24], dtype=int)
            da_p_bm = eva_idf(dap0, durations=durations, distribution='gumb', rps=df_rps.index.values)
            da_p_bm0 = da_p_bm.sel(duration=24)
            x = da_p_bm0['peaks'].dropna('time').values
            params = da_p_bm0['parameters'].values[1:]
            dist = da_p_bm0['distribution'].item()
        else:
            x = ds_peaks[dvar].dropna('time').values
            print(x.size)
            params, dist = lmoment_fitopt(x, distributions=distributions, criterium='AIC')
        dists[dvar] = get_frozen_dist(params, dist)
        df_eva.loc[dvar, dparams[-len(params):]] = params
        df_eva.loc[dvar, 'dist'] = dist
        df_rps[dvar] = _get_return_values(params, dist, rps=df_rps.index.values)
        _ = plot_return_values(x, params, dist, ax=axes[i])
        axes[i].set_ylim([x.min()*0.9, axes[i].get_ylim()[1]])

    axes[i].set_xlim([0.01, fgumbplot(500)])
    title = labels[dvar].split('\n')[0]
    axes[i].set_title(f'{letters[i]}) {title}')
    
    axes[i].set_ylabel(labels[dvar])
    if i < n-1:
        axes[i].set_xlabel('')

# save parameters
# df_eva.to_csv(join(rdir, 'marginal_params.csv'))

plt.savefig(join(r'../../4_results', f'drivers_eva.png'), dpi=300, bbox_axes='tight')

In [None]:
RPS = [2,5,10,50,100,500]
df_rps = pd.read_csv(join(rdir, f'marginal_rps.csv'), index_col=0).rename(columns={'h_tsw':'h'})
df_rps = df_rps.drop(columns='h_tsw0')
df_rps['w'] = df_rps['w']*0.2
df_rps = df_rps.loc[RPS,:].round(2)
df_rps.to_clipboard()
df_rps