# Lesson 7: Investigating the results of the iES runs

Now that the model iES run is finished, we will look into the progress of objective function (phi) with iteration, as well as the model performance (only KGE) at the best iteration. 

In [None]:
import pandas as pd
import pyemu
import sys
import shutil
import os
import numpy as np
from pathlib import Path
from datetime import datetime as dt
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from pytsproc import filters, series_metrics
plt.rcParams['font.size']=12
%matplotlib inline
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [None]:
kge_calc = pyemu.metrics._KGE

Let us define where the output directory of the iES experiment is, and also the path where the figures will be saved for this lesson. 

In [None]:
test_root = 'wrfpst' 
wkdir = Path('/home/docker/wrf-hydro-training/output/lesson6/host') # original host directory
outdir = Path('/home/docker/wrf-hydro-training/output/lesson7/plots')
if not os.path.exists(outdir):
    outdir.mkdir(parents = True)

## Read in the phi of ensemble 
We will read the phi values for all the iterations.

In [None]:
phi = pd.read_csv(wkdir / f'{test_root}.phi.actual.csv')

In [None]:
phi

In [None]:
plt.figure(figsize=(6,4))
ax = phi['base'].plot(legend=False, lw=1.5, color='r', label='base')
phi.iloc[:,6:7].plot(legend=False,lw=0.5,color='k',alpha=0.15,label='realizations', ax = ax)
plt.legend(['base','realizations'])
phi.iloc[:,6:].plot(legend=False,lw=0.5,alpha=0.15,color='k', ax = ax)
phi['base'].plot(legend=False, lw=1.5, color='r', ax=ax)
plt.ylabel('Phi')
plt.xlabel('iES iteration')
plt.xticks(ticks=np.arange(3))
ax.axes.tick_params(length=7, direction='in', right=True, top=True)
plt.legend(['base','realizations'], title='EXPLANATION', frameon=False, bbox_to_anchor =(0.97, 0.95))


### Define what iteration is the best

This is so going forward we plot the information related to this iteration. 

In [None]:
best_iter=2
phivec = phi.loc[best_iter][5:].copy().dropna()

In [None]:
phivec.hist(bins=5)

### Rejection sampling 

We usually do not use all the ensemble members if there is a ensemble size is large, 
however, this training has only 30 members to start with, which a few were dropped at the rejection sampling step prior to the iES. 
We will drop any members with phi values higher than 500 going forward.

In [None]:
phi_too_high = 100

In [None]:
fig, ax = plt.subplots(1,2, figsize=(6,4))
phivec.hist(bins=3, ax=ax[0])
ax[0].axvline(phi_too_high, color='k', label='cutoff PHI')
ax[0].legend()
ax[0].set_ylabel('Frequency')
print('Number of reals before rejection sampling: {}'.format(len(phivec)))
phivec = phivec[phivec<phi_too_high]
print('Number of reals after rejection sampling: {}'.format(len(phivec)))
phivec.hist(bins=50, ax=ax[1])
ax[1].set_xlim(ax[0].get_xlim())
ax[0].set_xlabel('Realization PHI')
ax[1].set_xlabel('Realization PHI')
ax[0].set_title('PHI distribution')
ax[1].set_title('PHI distribution trimmed')

handles, labels = ax[0].get_legend_handles_labels()
ax[0].legend(handles, labels, loc='best')

In [None]:
reals_to_keep = phivec.index.values
reals_to_keep

## Read in the ensemble of model simulations 

In [None]:
if not os.path.exists(wkdir / f'{test_root}.{best_iter}.obs.csv'):
    with tarfile.open(wkdir / f'{test_root}.{best_iter}.obs.csv.tar') as ctar:
        ctar.extractall(wkdir)
obens = pd.read_csv(wkdir / f'{test_root}.{best_iter}.obs.csv', index_col=0, dtype={'real_name':str})

In [None]:
print(len(obens))
obens=obens.loc[reals_to_keep]
print(len(obens))

In [None]:
obens

The first entry of the observation is the kge values calculated through the post processing script. We display the KGE values which are for duration of the calibration period. 

In [None]:
kge = obens.kge

In [None]:
kge.hist(bins=5)

In [None]:
# Function for parsing the name of the observation 
def parsename(cn):
    '''
    parse the dates from the WRF_hydro obs names
    '''
    tmp = cn.replace('obs_','')
    return dt.strptime(tmp, '%Y%m%d_%H0000')

In [None]:
obens = obens[obens.columns[1:]]
obens = obens.T

datetime = [parsename(i) for i in obens.index]
obens.index = datetime

# Plot the streamflow ensemble 

First let's read the observation. We define a simple function to read the observation to be used for plotting. 

In [None]:


# Function for reading streamflow values 
def get_obs_df(wkdir):
    '''
    read in the observations and return a df with KGE removed and 
    index set as the datetime
    '''
    obs= pd.read_csv(wkdir / 'wrfpst.obs_data.csv')

        
    kge = obs.iloc[0]
    obs= obs.iloc[1:]


    obs['datetime'] = [parsename(i) for i in obs.obsnme]
    obs.set_index(obs.datetime, inplace=True)

    # set missing values as nan
    obs.loc[obs.obsval<0, 'obsval'] = np.nan

    return obs

obs= get_obs_df(wkdir)
obs

## Let's join the observation to the model simulations 

In [None]:
# bring in the observed values
obens = obens.join(obs.obsval)

In [None]:
obens

In [None]:
with PdfPages(outdir / 'ens_monthly.pdf') as outpdf:
    for cn, cg in obens.groupby(pd.Grouper(freq="M")):
        print(f'plotting {cn.month:02d}/{cn.year}\r', end='')
        ax = cg.obsval.plot(color='orange', lw=1.5, figsize=(10,5), label='Observation')
        cg.base.plot(ax=ax, lw = 1.5, color='blue', label='Base Realization')
        cg.plot(color='k', lw=.6, alpha=.2, ax=ax, legend=False, label='Realizations')
        plt.legend(labels=['Observation','Base Realization','Realizations'])
        plt.title(f'{cn.month}/{cn.year}')
        plt.tight_layout()
        ax.set_ylim([0,500])
        outpdf.savefig()
        plt.show()
        plt.close('all')

In [None]:
with PdfPages(outdir / 'ens_monthly_log10.pdf') as outpdf:
    for cn, cg in obens.groupby(pd.Grouper(freq="M")):
        print(f'plotting {cn.month:02d}/{cn.year}\r', end='')
        ax = cg.obsval.apply(np.log10).plot(color='orange', lw=1.5, figsize=(10,5), label='Observation')
        cg.base.apply(np.log10).plot(ax=ax, lw = 1.5, color='blue', label='Base Realization')
        cg.apply(np.log10).plot(color='k', lw=.6, alpha=.2, ax=ax, legend=False, label='Realizations')
        plt.legend(labels=['Observation','Base Realization','Realizations'])
        plt.title(f'{cn.month}/{cn.year}')
        plt.tight_layout()
        ax.set_ylim([0,np.log10(500)])
        outpdf.savefig()
        plt.show()
        plt.close('all')


# Check KGE of calibration and validation periods independently

In [None]:
obsval= obens['obsval']
ens_all = obens[[i for i in obens.columns if ('obs' not in i) & ('ens' not in i)]]

In [None]:
val_kge = {}
reals = ens_all.columns
kge_out = [kge_calc(ens_all.loc[(obens.index>='2018-08-02') & (obens.index<'2018-08-10')][i],
                        obsval.loc[(obens.index>='2018-08-02') & (obens.index<'2018-08-10')]) for i in reals]
val_kge = pd.DataFrame({'real':reals,
                           'kge':kge_out})
val_kge.set_index('real', drop=True, inplace=True)

In [None]:
cal_kge = {}
reals = ens_all.columns
kge_out = [kge_calc(ens_all.loc[obens.index>='2018-08-10'][i],
                        obsval.loc[obens.index>='2018-08-10']) for i in reals]
cal_kge = pd.DataFrame({'real':reals,
                           'kge':kge_out})
cal_kge.set_index('real', drop=True, inplace=True)

In [None]:
fig, ax = plt.subplots(2,1, figsize=(8,8))

cal_kge.hist(ax=ax[0])
ax[0].set_xlim((0,1))
ax[0].set_title('Calibration')

val_kge.hist( ax=ax[1])
ax[1].set_xlim((0,1))
ax[1].set_title('Validation')

ax[1].set_xlabel('KGE', fontweight ='bold')