In [1]:
import pandas as pd
from datetime import datetime, timedelta
import xarray as xr
import numpy as np
from os.path import join, isfile
from copy import deepcopy

In [4]:
import sys
import os
sys.path.append(os.path.abspath('../3-postprocess/'))

In [5]:
root = r'/scratch/compound_hotspots'
ddir = join(root, 'data', '4-postprocessed')

## atrs

In [7]:
fn_rivmth_ts = join(ddir, 'rivmth_reanalysis.zarr')
ds = xr.open_zarr(fn_rivmth_ts)

In [8]:
fn_csv_coupling = join(root, 'src', '1-prepare', r'cmf_gtsm_75km_update191203.csv')
ccols = ['dist', 'dist2coast', 'rivwth', 'uparea', 'rivhgt', 'elevtn', 'gtsm_egm_offset', 
         'elvp10', 'elvp90', 'elvavg', 'mean_drain_length', 'mean_drain_slope']
coupling = pd.read_csv(fn_csv_coupling, index_col='index')[ccols]

In [10]:
# combine data
ds_drivers = ds.drop('WSE').mean('ensemble').sel(scen='surge').drop('scen')
ds_drivers['Htiderange'] = (ds_drivers['Htide_day_max'] - ds_drivers['Htide_day_min'])
ds_drivers_amax = ds_drivers[['Q', 'Hseas_day_mean', 'Hsurge_day_max', 'Hskewsurge_day', 'Htiderange']].resample(time='A').max().mean('time')
ds_drivers_amax = ds_drivers_amax.rename({n: '{}_amax'.format(n.split('_')[0]) for n in ds_drivers_amax.data_vars.keys()})
ds_drivers_amin = ds_drivers[['Q', 'Hseas_day_mean', 'Htiderange', 'Hskewsurge_day']].resample(time='A').min().mean('time')
ds_drivers_amin = ds_drivers_amin.rename({n: '{}_amin'.format(n.split('_')[0]) for n in ds_drivers_amin.data_vars.keys()})
ds_drivers_mean = ds_drivers[['Q', 'Htiderange']].mean('time')
ds_drivers_mean = ds_drivers_mean.rename({n: '{}_mean'.format(n.split('_')[0]) for n in ds_drivers_mean.data_vars.keys()})

da_wse = ds['WSE'].sel(scen='surge').drop('scen')
da_wse_std = da_wse.std('time').reset_coords(drop=True).mean('ensemble')
da_wse_std.name = 'wse_std'

attrs = xr.merge([
    coupling[ccols].to_xarray(),
    ds_drivers_amax,
    ds_drivers_amin,
    ds_drivers_mean,
    da_wse_std
]).astype(np.float32).to_dataframe()

In [None]:
# add Q annual max properties
da_Q = ds.sel(scen='msl')['Q']
da_Q_am = da_Q.groupby('time.year').max('time')
da_Q_am_avg = da_Q_am.mean('year')
da_Q_am_std = da_Q_am.std('year')
da_Q_am_cv = da_Q_am_std / da_Q_am_avg
attrs['Q_amax'] = da_Q_am_avg.mean('ensemble')
attrs['Q_amax_cv'] = da_Q_am_cv.mean('ensemble')

# long term average Q
attrs['Q_mean'] = da_Q.mean('time').mean('ensemble')

# add Q and Hss annual max properties
da_Hskewsurge = ds['Hskewsurge_day']
da_Hskewsurge_am = da_Hskewsurge.groupby('time.year').max('time')
attrs['Hskewsurge_amax'] = da_Hskewsurge_am.mean('year')
attrs['Hskewsurge_amax_cv'] = da_Hskewsurge_am.std('year') / attrs['Hskewsurge_amax']

In [None]:
fn_attrs_out = join(ddir, 'rivmth_mean_attrs.nc')
attrs_out  = attrs.drop(3354)  # outlier
attrs_out.to_xarray().to_netcdf(fn_attrs_out)
attrs_out.to_csv(fn_attrs_out.replace('.nc','.csv'))