In [None]:
import pandas as pd
import xarray as xr
from stats_eva import get_frozen_dist
import numpy as np

In [None]:
fn0 = r'../02_data/modelled_dataset_5000_years_magObs_timeObs.csv'
df = pd.read_csv(fn0, index_col=0).rename(columns={'1':'qb', '4':'qp', 'precip':'p', 'WSL':'h'})
print(df.index.size)

In [None]:
# discharge
fn = r'../02_data/events/cama_discharge_beira_daily_eva.nc' 
ds = xr.open_dataset(fn)

ds_q1 = ds.sel(index=1)
dist_qb = get_frozen_dist(ds_q1['parameters'].values, ds_q1['distribution'].item())
df['qb_rp'] = (1/dist_qb.sf(df['qb']))
df['qb_rp'] = np.where(df['qb']<dist_qp.isf(1/1.01), 1, df['qb_rp'])  # fix non-extrems to rp=1

ds_q4 = ds.sel(index=4)
dist_qp = get_frozen_dist(ds_q4['parameters'].values, ds_q4['distribution'].item())
df['qp_rp'] = (1/dist_qp.sf(df['qp']))
df['qp_rp'] = np.where(df['qp']<dist_qp.isf(1/1.01), 1, df['qp_rp']) # fix non-extrems to rp=1

# precip
fn = r'../02_data/events/era5_precip_beira_hourly_spatialmean_48hr_eva.nc' 
ds_p = xr.open_dataset(fn).sel(duration=24)
dist_p = get_frozen_dist(ds_p['parameters'].values[-2:], ds_p['distribution'].item())
df['p_rp'] = (1/dist_p.sf(df['p']))
df['p_rp'] = np.where(df['p']<dist_p.isf(1/1.01), 1, df['p_rp']) # fix non-extrems to rp=1

# wsl
df_coastrp = pd.read_csv(r'../01_forcing/annual_T_convolution_coastrp_local.csv', index_col=0)
df['h_rp'] = np.interp(df['h'], df_coastrp['local_coastrp'], df_coastrp.index)
df['h_rp'] = np.where(df['h']<df_coastrp['local_coastrp'].values[0], 1, df['h_rp']) # fix non-extrems to rp=1

# limit to rp 500 to avoid extrapolation later
df[['h_rp', 'p_rp', 'qb_rp', 'qp_rp']] = np.minimum(500, df[['h_rp', 'p_rp', 'qb_rp', 'qp_rp']])
# df.to_csv(fn0.replace('.csv', '_rp.csv'))
df.head()

In [None]:
# get all scenarios used for linear interpolating damages
rps = np.array([1,2,5,10,50,100,500], dtype=int)
cols = ['qb_rp', 'qp_rp', 'p_rp', 'h_rp']
scens = []

values = df.loc[:,cols].values
for event in values:
    rps_lst = []
    for rp in event:
        if rp in rps:
            rps_lst.append([int(rp), int(rp)])
        else:
            rps_lst.append([rps[rps<rp][-1], rps[rps>rp][0]])
    for rps0 in zip(*rps_lst):
        scens.append(list(rps0))
# # include univariate
# rps0 = np.zeros(4, dtype=int)
# for i in range(4):
#     for rp in rps[1:]:
#         _rps = rps0.tolist()
#         _rps[i] = rp
#         scens.append(_rps)
# # include full dependence
# for rp in rps:
#     scens.append(np.full(4, rp, dtype=int).tolist())
##
df_scen = pd.DataFrame(data=np.vstack(scens), columns=cols).value_counts().rename('count').reset_index()
df_scen['scen'] = [
    f"qb{qb_rp:03d}_qp{qp_rp:03d}_h{h_rp:03d}_p{p_rp:03d}" 
    for i, (qb_rp, qp_rp, p_rp, h_rp) in df_scen[cols].iterrows()
]
df_scen
print(df_scen.index.size)
df_scen.to_csv(fn0.replace('.csv', '_scenCount.csv'))
df_scen.sort_values('count', ascending=False).head()

In [None]:
(df[['h_rp', 'p_rp', 'qb_rp', 'qp_rp']]>500).sum()

In [None]:
fn = r'../02_data/flood_impact.csv'
df_flood = pd.read_csv(fn, index_col=0)
df_flood[['h_rp', 'p_rp', 'qb_rp', 'qp_rp']] = np.maximum(1, df_flood[['h_rp', 'p_rp', 'qb_rp', 'qp_rp']])
df_flood = df_flood.reset_index()

# transform rp to normal values
df_flood['qb'] = dist_qb.isf(1/np.maximum(1.01, df_flood['qb_rp']))
df_flood['qp'] = dist_qp.isf(1/np.maximum(1.01, df_flood['qp_rp']))
df_flood['p'] = dist_p.isf(1/np.maximum(1.01, df_flood['p_rp']))
df_flood['h'] = np.interp(df_flood['h_rp'], df_coastrp.index, df_coastrp['local_coastrp'])

df_flood.head()