# Differential weighting based on both the regrouping of observations and also behavior of the prior ensemble


In [None]:
import pandas as pd
import pyemu
import sys
import shutil
import os
import numpy as np
from pathlib import Path
from datetime import datetime as dt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import matplotlib as mpl
#sys.path.append('../scripts/')
sys.path.append(os.path.abspath('../scripts/'))
from obs_helper import get_obs_df, parsename
from pytsproc import filters, series_metrics
plt.rcParams['font.size']=12
%matplotlib inline
import warnings
warnings.simplefilter("ignore", DeprecationWarning)

In [None]:
wkdir = Path('/home/docker/wrf-hydro-training/output/lesson4/host') # original host directory
priordir = Path('/home/docker/wrf-hydro-training/output/lesson5/host') # original host directory
plotdir = Path('/home/docker/wrf-hydro-training/output/lesson6/plots')

In [None]:
if not os.path.exists(plotdir):
    plotdir.mkdir()

**Read in the PESTPP control file from Lesson4**

In [None]:
pst = pyemu.Pst(str(wkdir / 'wrfpst.pst'), resfile=str(wkdir / 'wrfpst.base.rei'))

In [None]:
pst.parameter_data

In [None]:
pst.observation_data.head()

In [None]:
pst.plot(kind='phi_pie')

In [None]:
obs = pst.observation_data.copy()
obs.loc[obs.obsval==-9999, 'obsval'] = np.nan

obs['dtime'] = [np.nan] + [parsename(i) for i in obs.iloc[1:].index]

# trim off the burn-in period
obs.loc[obs.dtime<'2018-08-05', 'obgnme'] = 'burn_in'
# label the validation period
obs.loc[(obs.dtime>='2018-08-05') & (obs.dtime<'2018-08-15'), 'obgnme'] = 'validation'

In [None]:
# copy over the updated obsgroups to the original
pst.observation_data.obgnme = obs.obgnme.values

obgnames = obs.obgnme.copy()

obgnames.loc['kge'] = 'kge'
obgnames.unique()

In [None]:
obs['discharge'] = obs.obsval

In [None]:
obs.iloc[:]

## for next analysis, trim off kge and burn in period, and set index to datetime

In [None]:
# Finding the NaN streamflows
obs = obs.iloc[1:].loc[obs.dtime>='2018-08-01']
obs.set_index('dtime', drop=False, inplace=True)
obs.loc[obs.discharge.isnull()]

In [None]:
# fill in the nan discharge values with linear interpolatoin
obs['discharge']=obs.discharge.interpolate()

# flip back the NaN obs values to -9999
obs.loc[obs.obsval.isnull(), 'obsval'] = -9999

# quantiles

In [None]:
# set number of quantiles
quantiles=4

quantile_vals = [obs.discharge.quantile(((i+1)/quantiles)) for i in range(quantiles)]

quantile_vals

#identify the locations of the quantiles
obs['quantile_grp'] = np.nan
for i,q_current in zip(range(1,quantiles+1),quantile_vals):
    if i==1:
        obs.loc[obs.discharge<=q_current, 'quantile_grp'] = 'q1'
    else:
        obs.loc[(obs.discharge <= q_current) & (obs.discharge>quantile_vals[i-2]), 'quantile_grp'] = f'q{i}'
        

assert len(obs.loc[obs.quantile_grp.isnull()]) == 0

obs.quantile_grp.unique()

for cn,cg in obs.groupby('quantile_grp'):
    print(cn, len(cg))

# event based weighting

In [None]:
Qhe = series_metrics.hydro_events(obs,  wlen=50, prominence=25, height=2)

In [None]:
 ax = obs.discharge.plot(figsize=(14,4))
 [ax.axvline(i, c='orange', alpha=.4) for i in Qhe[0]];
 [ax.axvline(i, c='green', alpha=.4) for i in Qhe[1]['event_ends']];


In [None]:
Qhe[1]

In [None]:
 obs.obgnme = obs.quantile_grp
 for st, en in zip(Qhe[0], Qhe[1]['event_ends']):
     obs.loc[(obs.index>=st) & (obs.index<=en), 'obgnme'] = 'recession'
     obs.loc[(obs.index==st), 'obgnme'] = 'event_peak'

In [None]:
obs.loc[obs.obgnme=='recession']

In [None]:
obs.loc[obs.obgnme=='event_peak']

## Let's check out how the groups look in the hydrographs

In [None]:
cm = mpl.cm.get_cmap('jet_r')
grps = obs.obgnme.unique()
colors = [cm(i/len(grps)) for i in range(len(grps))]
i=0
with PdfPages(plotdir / 'obs_monthly_new_groups.pdf') as outpdf:
    for cn, cg in obs.groupby(pd.Grouper(freq="M")):
        print(f'plotting {cn.month:02d}/{cn.year}\r', end='')
        ax = cg.discharge.plot(figsize=(10,6), lw=.3, color='k', alpha = .4)
        st = [i for i in Qhe[0] if i in cg.index]
        en = [i for i in Qhe[1]['event_ends'] if i in cg.index]
        [ax.axvline(i, c='orange', alpha=.4) for i in st];
        [ax.axvline(i, c='green', alpha=.4) for i in en];
        for cgrp, c in zip(grps, colors):
            if cgrp in cg.obgnme.unique():
                pg = cg.loc[cg.obgnme==cgrp]
                pg = pg.set_index('dtime').asfreq('H')
                pg.discharge.plot(ax=ax, lw=2, color=c, label=cgrp)
        plt.legend()
        plt.title(f'{cn.month}/{cn.year}')
        plt.tight_layout()
        outpdf.savefig()
        plt.close('all')

## now bring the new observation group names back to `pst.observation_data`

In [None]:
pst.observation_data.loc[obs.obsnme, 'obgnme'] = obs.obgnme.values

pst.observation_data.loc[pst.observation_data.index.isin(obs.obsnme), 'obgnme'] = obs.obgnme.values

pst.observation_data.loc[pst.observation_data['obsval'] == -9999, 'obgnme'] = 'burn_in'
pst.observation_data.loc[pst.observation_data['obsval'] == -9999, 'weight'] = 0
pst.observation_data.loc[pst.observation_data['obgnme'] == 'burn_in', 'weight'] = 0
pst.observation_data

In [None]:
pst.plot(kind='phi_pie')

In [None]:
pst.phi_components_normalized

In [None]:
pst.observation_data.loc[pst.observation_data.obgnme=='streamflow', 
                         'obgnme'] = 'kge'

In [None]:
pst.phi_components

In [None]:
new_portions = {k: i*pst.nnz_obs for k,i in 
                pst.phi_components_normalized.items()}
new_portions

# Prepare the PEST++ control file for an experiment with focus on low_flow 
- Name of the control file will be wrfpst.pst

In [None]:
pst = pyemu.Pst(str(wkdir / 'wrfpst.pst'), 
                resfile=str(wkdir / 'wrfpst.base.rei'))
# shenanigas to set up the burn-In and validation periods
pst.observation_data.loc[obgnames.index, 'obgnme'] = obgnames
pst.observation_data.loc[obs.obsnme, 'obgnme'] = obs.obgnme.values
pst.observation_data.loc['kge', 'obgnme'] = 'kge'

In [None]:
pst.observation_data.loc[pst.observation_data['obsval'] == -9999, 'obgnme'] = 'burn_in'
pst.observation_data.loc[pst.observation_data['obsval'] == -9999, 'weight'] = 0
pst.observation_data.loc[pst.observation_data['obgnme'] == 'burn_in', 'weight'] = 0
pst.observation_data.loc[pst.observation_data['obgnme'] == 'validation', 'weight'] = 0

In [None]:
pst.observation_data[:]

In [None]:
wkdir

In [None]:
obs = pst.observation_data.copy()
obs.loc[obs.obsval==-9999, 'obsval'] = np.nan

obs['dtime'] = [np.nan] + [parsename(i) for i in obs.iloc[1:].index]

# trim off the burn-in period
obs.loc[obs.dtime<'2018-08-05', 'obgnme'] = 'burn_in'
# label the validation period
obs.loc[(obs.dtime>='2018-08-05') & (obs.dtime<'2018-08-15'), 'obgnme'] = 'validation'

In [None]:
obs[obs.obgnme == "validation"] 

### first set weights to 10% CV

In [None]:
pst.plot(kind='phi_pie')

In [None]:
pst.observation_data.loc[pst.observation_data.obgnme != 'kge' , 'weight'] = \
    10 / pst.observation_data.loc[pst.observation_data.obgnme != 'kge' , 'obsval']

pst.observation_data.loc[pst.observation_data['obsval'] == -9999, 'obgnme'] = 'burn_in'
pst.observation_data.loc[pst.observation_data['obsval'] == -9999, 'weight'] = 0

pst.observation_data.loc[pst.observation_data.obgnme == 'burn_in' , 'weight'] = 0 
pst.observation_data.loc[pst.observation_data.obgnme == 'validation' , 'weight'] = 0 

pst.res['weight'] = pst.observation_data.weight.values # have to trick the residuals to know about new obsgp

In [None]:
pst.observation_data.head()

In [None]:
pst.plot(kind='phi_pie')

In [None]:
# make sure there were no 0 flow values which would result in infinite weights
assert np.unique(np.isinf(pst.observation_data.weight.values)) == np.array([False])

In [None]:
new_portions = {'burn_in': 0.0,
'event_peak': 0.1,
 'validation':0,
 'kge': 0.,
 'q1': 0.4,
 'q2': 0.1,
 'q3': 0.1,
 'q4': 0.1,
 'recession': 0.2}

In [None]:
new_portions = {k:v*pst.nnz_obs for k,v in 
                new_portions.items()}

In [None]:
pst.res['group'] = pst.observation_data.obgnme.values # have to trick the residuals to know about new obsgp
pst.res['weight'] = pst.observation_data.weight.values # have to trick the residuals to know about new obsgp
pst.adjust_weights(obsgrp_dict=new_portions)

In [None]:
pst.plot(kind='phi_pie')

# Rejection Sampling Based on Prior MC runs
### next we need to recalculated phi with new weights to perform rejection sampling


In [None]:
obens = pyemu.ObservationEnsemble.from_csv(pst, str(priordir / 'wrfpst.0.obs.csv'),
                                           index_col=0, dtype={'real_name':str})

In [None]:
obens.head()

In [None]:
phi = obens.phi_vector
print(len(phi))

In [None]:
#No noise
phi.hist(bins=10)

In [None]:
phicutoff = 7500

In [None]:
print(len(phi))
phi = phi.loc[phi<phicutoff]
print(len(phi))


In [None]:
reals_to_keep = phi.index.values

In [None]:
prior_pars = pd.read_csv(priordir / 'wrfpst.0.par.csv', index_col=0, dtype={'real_name':str})
pp = prior_pars.loc[reals_to_keep]
pp.index = [str(i) for i in range(len(pp)-1)] + ['base']
pp.to_csv(
            priordir/'wrfpst.starting_pars.csv')
oe = obens._df.loc[reals_to_keep].copy()
oe.index = pp.index
oe.to_csv(
            priordir/'wrfpst.starting_obs.csv')

In [None]:
#pst.observation_data.to_csv(priordir/'wrfpst.obs_data.csv')

# Create iES run directory

### Delete iES folder if it is already exist.

In [None]:
if os.path.exists(Path('/home/docker/wrf-hydro-training/output/lesson6/iES_Run')):
    shutil.rmtree('/home/docker/wrf-hydro-training/output/lesson6/iES_Run')

In [None]:
%%bash
cp -r ~/wrf-hydro-training/output/lesson4/Single_Model_Run/ ~/wrf-hydro-training/output/lesson6/iES_Run
ls ~/wrf-hydro-training/output/lesson6/iES_Run

In [None]:
wkdir = Path('/home/docker/wrf-hydro-training/output/lesson4/host')
priordir = Path('/home/docker/wrf-hydro-training/output/lesson5/host') # original host directory
iesdir = Path('/home/docker/wrf-hydro-training/output/lesson6/iES_Run') # original host directory

In [None]:
#!cp ~/wrf-hydro-training/example_case/OBS/Observation_File_20180801_20180901.csv ~/wrf-hydro-training/output/lesson4/host/
pst = pyemu.Pst(str(wkdir / 'wrfpst.pst'), resfile=str(wkdir / 'wrfpst.0.base.rei'))

In [None]:
obens = pyemu.ObservationEnsemble.from_csv(pst, str(priordir / 'wrfpst.0.obs.csv'),
                                           index_col=0, dtype={'real_name':str})

In [None]:
prior_pars = pd.read_csv(priordir / 'wrfpst.0.par.csv', index_col=0, dtype={'real_name':str})
pp = prior_pars.loc[reals_to_keep]
pp.index = [str(i) for i in range(len(pp)-1)] + ['base']
pp.to_csv('/home/docker/wrf-hydro-training/output/lesson6/iES_Run/wrfpst.starting_pars.csv')
oo = obens._df.loc[reals_to_keep].copy()
oo.index = pp.index
oo.to_csv('/home/docker/wrf-hydro-training/output/lesson6/iES_Run/wrfpst.starting_obs.csv')

In [None]:
#!cp ~/wrf-hydro-training/example_case/OBS/obs_noise_01473000_300ens_201808.csv ~/wrf-hydro-training/output/lesson5/host/wrfpst.obs+noise.csv 

In [None]:
obs_noise_ens = pd.read_csv('/home/docker/wrf-hydro-training/example_case/OBS/obs_noise_01473000_300ens_201808.csv', index_col = 'real_name', dtype={'real_name':str})
one = obs_noise_ens.loc[reals_to_keep].copy()
one.index=pp.index
one.to_csv('/home/docker/wrf-hydro-training/output/lesson6/iES_Run/wrfpst.starting_obs+noise.csv')


In [None]:
obs_noise_ens.head()

**Load the existing pest control file and set some problem specific PESTPP-IES settings**

In [None]:
len(reals_to_keep)

In [None]:
pst.control_data.noptmax=3
pst.pestpp_options["ies_num_reals"] = len(reals_to_keep)
pst.pestpp_options["overdue_giveup_minutes"] = 200
pst.pestpp_options["ies_no_noise"] = 'false'
pst.pestpp_options["ies_observation_ensemble"] = 'wrfpst.starting_obs+noise.csv'
pst.pestpp_options["ies_restart_observation_ensemble"] = 'wrfpst.starting_obs.csv'
pst.pestpp_options["ies_parameter_ensemble"] = 'wrfpst.starting_pars.csv'

pst.write(iesdir/'wrfpst.pst', version=2)

**Step 6. Running PEST++ with WRF-Hydro**



In [None]:
# rel_path (str, optional) – the relative path to where pest(++) should be run from within the worker_dir, defaults to the uppermost level of the worker dir. 

pyemu.utils.os_utils.start_workers(worker_dir = "/home/docker/wrf-hydro-training/output/lesson6/iES_Run", 
                                   exe_rel_path = "pestpp-ies", 
                                   pst_rel_path = "wrfpst.pst", 
                                   num_workers=2, 
                                   worker_root='/home/docker/wrf-hydro-training/output/lesson6/',
                                   master_dir = "/home/docker/wrf-hydro-training/output/lesson6/host",
                                   port=4004, 
                                   verbose = True, 
                                   cleanup = False)


**Step 7. Let check the run directory**

Check the PEST++ workers directory


In [None]:
%%bash 
ls  ~/wrf-hydro-training/output/lesson6/worker_0/WRFHydro_Model/