# Lesson 7: Investigating the results of the IES runs

In [None]:
import pandas as pd
import pyemu
import sys
import shutil
import os
import numpy as np
from pathlib import Path
from datetime import datetime as dt
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
import matplotlib as mpl
#sys.path.append('../scripts/')
sys.path.append(os.path.abspath('../scripts/'))
from obs_helper import get_obs_df, parsename
from pytsproc import filters, series_metrics
plt.rcParams['font.size']=12
%matplotlib inline
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
kge_calc = pyemu.metrics._KGE

In [None]:
test_root = 'wrfpst' # base name for the run
wkdir = Path('/home/docker/wrf-hydro-training/output/lesson6/host') # original host directory
outdir = Path('/home/docker/wrf-hydro-training/output/lesson7/plots')
if not os.path.exists(outdir):
    outdir.mkdir()

# read in the observed values

In [None]:
obs= get_obs_df(Path('/home/docker/wrf-hydro-training/output/lesson6/worker_0/'))

In [None]:
obs

# read in the phi of ensemble

In [None]:
phi = pd.read_csv(wkdir / f'{test_root}.phi.actual.csv')

In [None]:
phi

In [None]:
plt.figure(figsize=(6,4))
ax = phi['base'].plot(legend=False, lw=1.5, color='r', label='base')
phi.iloc[:,6:7].plot(legend=False,lw=0.5,color='k',alpha=0.15,label='realizations', ax = ax)
plt.legend(['base','realizations'])
phi.iloc[:,6:].plot(legend=False,lw=0.5,alpha=0.15,color='k', ax = ax)
phi['base'].plot(legend=False, lw=1.5, color='r', ax=ax)
plt.ylabel('Phi')
plt.xlabel('iES iteration')
plt.xticks(ticks=np.arange(4))
ax.axes.tick_params(length=7, direction='in', right=True, top=True)
plt.legend(['base','realizations'], title='EXPLANATION', frameon=False, bbox_to_anchor =(0.97, 0.95))


In [None]:
best_iter=3
phivec = phi.loc[best_iter][5:].copy().dropna()


In [None]:
phivec.hist(bins=5)

In [None]:
phi_too_high = 903100000


In [None]:
fig, ax = plt.subplots(1,2, figsize=(6,4))
phivec.hist(bins=3, ax=ax[0])
ax[0].axvline(phi_too_high, color='k', label='cutoff PHI')
ax[0].legend()
ax[0].set_ylabel('Frequency')
print('Number of reals before rejection sampling: {}'.format(len(phivec)))
phivec = phivec[phivec<phi_too_high]
print('Number of reals after rejection sampling: {}'.format(len(phivec)))
phivec.hist(bins=50, ax=ax[1])
ax[1].set_xlim(ax[0].get_xlim())
ax[0].set_xlabel('Realization PHI')
ax[1].set_xlabel('Realization PHI')
ax[0].set_title('PHI distribution')
ax[1].set_title('PHI distribution trimmed')

handles, labels = ax[0].get_legend_handles_labels()
ax[0].legend(handles, labels, loc='best')

In [None]:
reals_to_keep = phivec.index.values
reals_to_keep

# read in the ensemble

In [None]:

if not os.path.exists(wkdir / f'{test_root}.{best_iter}.obs.csv'):
    with tarfile.open(wkdir / f'{test_root}.{best_iter}.obs.csv.tar') as ctar:
        ctar.extractall(wkdir)
obens = pd.read_csv(wkdir / f'{test_root}.{best_iter}.obs.csv', index_col=0, dtype={'real_name':str})

In [None]:
print(len(obens))
obens=obens.loc[reals_to_keep]
print(len(obens))

In [None]:
obens

In [None]:
kge = obens.kge

In [None]:
kge.hist(bins=5)

In [None]:
obens = obens[obens.columns[1:]]
obens = obens.T
datetime = [parsename(i) for i in obens.index]
obens.index = datetime

# Grab a random subset of columns to plot

In [None]:
obens.columns

### run one of the following two blocks to indicate which ensemble individual mambers to plot

In [None]:
# choose random plotcols
plotcols = np.random.choice(obens.columns[:-1],75)

In [None]:
# or choose best plotcols based on kge
best_res = 25
plotcols = kge.sort_values().iloc[-best_res:].index

In [None]:
# get bounds of the ensemble
obens['min_ens'] = obens.min(axis=1)
obens['max_ens'] = obens.max(axis=1)
plt.fill_between(obens.index, obens.min_ens,obens.max_ens, alpha=.8)

In [None]:
obens

In [None]:
# bring in the observed values
obens = obens.join(obs.obsval)
#obens = obens.join(obs, rsuffix='_obs')


In [None]:
obens

In [None]:
with PdfPages(outdir / 'ens_monthly_VALIDATION.pdf') as outpdf:
    for cn, cg in obens.loc[(obens.index>='2018-08-01') & (obens.index<'2018-08-31')].groupby(pd.Grouper(freq="M")):
        print(f'plotting {cn.month:02d}/{cn.year}\r', end='')
        ax = cg.obsval.plot(color='orange', lw=1.5, figsize=(10,5), label='Observation')
        ax.fill_between(cg.index, cg.min_ens,cg.max_ens, alpha=.2, label='Ensemble Band')
        cg[plotcols[0]].plot(color='k', lw=.6, alpha=.2, ax=ax, legend=False, label='Select Realizations')
        #cg.base.plot(ax=ax, lw = 1.5, color='blue', label='Base Realization')
        plt.legend()
        cg[plotcols].plot(color='k', lw=.2, alpha=.5, ax=ax, legend=False, label=None)
        plt.title(f'{cn.month}/{cn.year}')
        plt.tight_layout()
        plotmax = 150
        #plotmax = np.max((500, cg.max_ens.max()))
        ax.set_ylim([0,plotmax])
        outpdf.savefig()
        plt.show()
        plt.close('all')

In [None]:
with PdfPages(outdir / 'ens_monthly_VALIDATION_log10.pdf') as outpdf:
    for cn, cg in obens.loc[(obens.index>='2018-08-01') & (obens.index<'2018-08-31')].groupby(pd.Grouper(freq="M")):
        print(f'plotting {cn.month:02d}/{cn.year}\r', end='')
        ax = cg.obsval.apply(np.log10).plot(color='orange', lw=1.5, figsize=(10,5), label='Observation')
        ax.fill_between(cg.index, cg.apply(np.log10).min_ens,cg.apply(np.log10).max_ens, alpha=.2, label='Ensemble Band')
        
        cg[plotcols[0]].apply(np.log10).plot(color='k', lw=.6, alpha=.2, ax=ax, legend=False, label='Select Realizations')
        #cg.base.apply(np.log10).plot(ax=ax, lw = 1.5, color='blue', label='Base Realization')
        plt.legend()
        cg[plotcols].apply(np.log10).plot(color='k', lw=.2, alpha=.5, ax=ax, legend=False, label=None)
        plt.title(f'{cn.month}/{cn.year}')
        plt.tight_layout()
        plotmax = np.max((500, cg.max_ens.max()))
        ax.set_ylim([0,np.log10(plotmax)])
        outpdf.savefig()
        plt.show()
        plt.close('all')

# Check KGE of calibration and validation periods independently

In [None]:
obsval= obens['obsval']
#nwm_q = obens[['nwm_optimal','nwm_initial']]
ppp_q = obens[[i for i in obens.columns if ('nwm' not in i) & ('obs' not in i) & ('ens' not in i)]]
ppp_q_top = obens[plotcols]

In [None]:
val_kge = {}
for cn,ce in zip(['ppp', 'ppp_top'],[ppp_q, ppp_q_top]):
    reals = ce.columns
    kge_out = [kge_calc(ce.loc[(obens.index>='2018-08-05') & (obens.index<'2018-08-15')][i],
                        obsval.loc[(obens.index>='2018-08-05') & (obens.index<'2018-08-15')]) for i in reals]
    val_kge[cn] = pd.DataFrame({'real':reals,
                           'kge':kge_out})
    val_kge[cn].set_index('real', drop=True, inplace=True)

In [None]:
cal_kge = {}
for cn,ce in zip(['ppp', 'ppp_top'],[ppp_q, ppp_q_top]):
    reals = ce.columns
    kge_out = [kge_calc(ce.loc[obens.index>='2018-08-15'][i],
                        obsval.loc[obens.index>='2018-08-15']) for i in reals]
    cal_kge[cn] = pd.DataFrame({'real':reals,
                           'kge':kge_out})
    cal_kge[cn].set_index('real', drop=True, inplace=True)

In [None]:
fig, ax = plt.subplots(2,1, figsize=(8,8))

# calibration on top
cal_kge['ppp'].hist(bins=50, ax=ax[0])
ax[0].set_xlim((0,1))
ax[0].set_title('Calibration')
#ax[0].axvline(cal_kge['nwm'].loc['nwm_optimal', 'kge'], lw=1.5, color='orange')
val_kge['ppp'].hist(bins=50, ax=ax[1])
ax[1].set_xlim((0,1))
ax[1].set_title('Validation')
#ax[1].axvline(val_kge['nwm'].loc['nwm_optimal', 'kge'], lw=1.5, color='orange')

In [None]:
fig, ax = plt.subplots(2,1, figsize=(8,8))

# calibration on top
cal_kge['ppp_top'].hist(ax=ax[0])
ax[0].set_xlim((0,1))
ax[0].set_title('Calibration')
#ax[0].axvline(cal_kge['nwm'].loc['nwm_optimal', 'kge'], lw=1.5, color='orange')
val_kge['ppp_top'].hist( ax=ax[1])
ax[1].set_xlim((0,1))
ax[1].set_title('Validation')
#ax[1].axvline(val_kge['nwm'].loc['nwm_optimal', 'kge'], lw=1.5, color='orange')

# Explore the parameters

In [None]:
parens = pd.read_csv(wkdir / f'{test_root}.{best_iter}.par.csv', index_col=0, dtype={'real_name':str})

In [None]:
parens