# Setup the PEST(++) interface around the enhanced Freyberg model

In this notebook, we will construct a complex model independent (non-intrusive) interface around an existing `MODFLOW-NWT` model using the `python/flopy/pyemu` stack.

In [None]:
%matplotlib inline
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import flopy
import pyemu
import prep_deps
import matplotlib as mpl
plt.rcParams['font.size']=12
%matplotlib inline

First we define a base directory `b_d` from which we will read in a model already created `freyberg.nam`. This will form the basis of the remainder of the exercise (and those to follow)

In [None]:
os.getcwd()

In [None]:
b_d = os.path.join("..","..","data","freyberg_nwt")
nam_file = "freyberg.nam"

### load the existing Freyberg model. This version should run but is not yet connected with `PEST++`

In [None]:
# note that to load a model in a different folder, you supply the namefile without path and supply the path
# to it in the model_ws variable
m = flopy.modflow.Modflow.load(nam_file,model_ws=b_d,check=False,forgive=False)


### some visuals

In [None]:
# plot some model attributes
fig = plt.figure(figsize=(12,7))
ax = plt.subplot(111,aspect="equal")
mm = flopy.plot.ModelMap(model=m)
mm.plot_grid()
mm.plot_ibound()
mm.plot_bc('SFR')
ax = mm.ax
#m.wel.stress_period_data.plot(ax=ax,mflay=2)

# plot obs locations
obs = pd.read_csv(os.path.join(b_d,"obs_loc.csv"))
                  
obs_x = [m.sr.xcentergrid[r-1,c-1] for r,c in obs.loc[:,["row","col"]].values]
obs_y = [m.sr.ycentergrid[r-1,c-1] for r,c in obs.loc[:,["row","col"]].values]
ax.scatter(obs_x,obs_y,marker='.',label="water-level obs",s=80)

#plot names on the pumping well locations
wel_data = m.wel.stress_period_data[0]
wel_x = m.sr.xcentergrid[wel_data["i"],wel_data["j"]]
wel_y = m.sr.ycentergrid[wel_data["i"],wel_data["j"]]
for i,(x,y) in enumerate(zip(wel_x,wel_y)):
    ax.scatter([x],[y],color="red",marker="s",s=50)
    #ax.text(x,y,"{0}".format(i+1),ha="center",va="center")

ax.set_ylabel("y(m)")
ax.set_xlabel("x(m)")
plt.show()

In [None]:
m.get_package_list()

In [None]:
pd.DataFrame(m.wel.stress_period_data.data[0])

### we can do a couple `flopy` things to move where the new model will be written

In [None]:
# assign the executable name for the model
m.exe_name = "mfnwt"

# now let's run this in a new folder called temp so we don't overwrite the original data
m.change_model_ws("temp",reset_external=True)

# this writes all the MODFLOW files in the new location 
m.write_input()

# the following helps get the dependecies (both python and executables) in the right place
prep_deps.prep_template(t_d="temp")

### now we can run the model once using a `pyemu` helper
This helper is particularly useful if you run on more than one platform (e.g. Mac and Windows)

In [None]:
pyemu.os_utils.run("{0} {1}".format(m.exe_name,m.name+".nam"),cwd=m.model_ws)

### read in the heads and plot them up along with the budget components
Note that there is a historic period and a scenario with future conditions that differ. 

_For the future scenario, a serious drought, recharge is lower and pumping/abstraction is increased to make up for the presumed deficite in water for agriculture._

In [None]:
plt.figure()
hds = flopy.utils.HeadFile(os.path.join(m.model_ws,m.name+".hds"),model=m)
hds.plot(mflay=0)
lst = flopy.utils.MfListBudget(os.path.join(m.model_ws,m.name+".list"))
df = lst.get_dataframes(diff=True)[0]
plt.figure()
ax = df.plot(kind="bar",figsize=(6,6), grid=True)
ax.set_xticklabels(["historic","scenario"])

We can see the effect of the "scenario" in the second stress period with less recharge and more abstraction. 

### Plot depth to water

In [None]:
type(m.dis.top), type(hds.get_data()[0,:,:]), hds.get_data()[0,:,:].shape

In [None]:
dtw = m.dis.top.array - hds.get_data()[0,:,:]
dtw = np.ma.masked_where(m.bas6.ibound[0].array==0,dtw)
c = plt.imshow(dtw)
plt.title('Depth to Water')
plt.colorbar(c)

we can see the river and well locations expressed in the depth to water pattern.

## Setup data structures related to what we want to parameterize and what we want to observe

### first the parameterization of model inputs

In [None]:
props = []
# here we specify which packages we wish to parameterize, 
# starting with those that do not change over time
paks = ["upw.hk","upw.vka","upw.ss","upw.sy","bas6.strt"] 
for k in range(m.nlay):
    props.extend([[p,k] for p in paks])
# next we specify that we want to make parameters for recharge
# for both stress periods (zero-based! Python style)
props.append(["rch.rech",0])
props.append(["rch.rech",1])

props

### we want to handle list-type parameters in two ways
for `spatial_list_props` this will apply a multiplier distributed spatially that applied in all stress periods throughout the model

for `temporal_list_props` this will apply a multiplier for each stress period applied to all the spatial locations

In [None]:
spatial_list_props = [["wel.flux",2],["drn.cond",0]]  # spatially by each list entry, across all stress periods
temporal_list_props = [["wel.flux",0],["wel.flux",1]]  # spatially uniform for each stress period

spatial_list_props, temporal_list_props

### next we want to set up the extraction of model outputs for which we have observations. First, we will setup a post-processor that will read the heads for all active cells in both stress periods - why not?

In [None]:
hds_kperk = [[0,k] for k in range(m.nlay)]
hds_kperk.extend([[1,k] for k in range(m.nlay)])

hds_kperk

### then we setup monitoring of the SFR ASCII outputs.  
we will accumulate the first 20 reaches and last 20 reaches (corresponding to the top and bottom half of the model, respectively) together to form forecasts of sw-gw exchange in the headwaters (`hw`) and tailwaters (`tw`).  Then we will also add each reach individually for monitoring as well

In [None]:
sfr_obs_dict = {}
sfr_obs_dict["hw"] = np.arange(1,int(m.nrow/2))
sfr_obs_dict["tw"] = np.arange(int(m.nrow/2),m.nrow)
for i in range(m.nrow):
    sfr_obs_dict[i] = i+1

In [None]:
sfr_obs_dict

### here we go...

This `pyemu` class has grown into a monster...it does (among other things):
- sets up combinations of multiplier parameters for array inputs, including uniform, zones, pilot points, grids, and KL expansion types
- sets up combinations of multiplier parameters for list inputs
- handles several of the shitty modflow exceptions to the array and list style inputs
- sets up large numbers of observations based on arrays or time series
- writes .tpl, .ins, .pst, etc
- writes a python forward run script
- writes a prior parameter covaraince matrix using geostatistical correlations
- draws from the prior parameter covariance matrix to generate a prior parameter ensemble

WAT?!

This will be slow because the pure python kriging...but, hey, its free!

For our purposes, we will setup combinations of constant (by layer), pilot points and grid-scale parameters for each of the array-based properties we defined earlier.  This lets us explore options for parameterization and also start to understand how information flows in the history matching problem


In [None]:
pst_helper = pyemu.helpers.PstFromFlopyModel(nam_file,new_model_ws="template",org_model_ws="temp",
                                             const_props=props,spatial_list_props=spatial_list_props,
                                             temporal_list_props=temporal_list_props,remove_existing=True,
                                             grid_props=props,pp_props=props,sfr_pars=True,hds_kperk=hds_kperk,
                                             sfr_obs=sfr_obs_dict,build_prior=False,model_exe_name="mfnwt",
                                             pp_space=4)
prep_deps.prep_template(t_d=pst_helper.new_model_ws)

The `pst_helper` instance contains the `pyemu.Pst` instance:

In [None]:
# so, pull out the `pyemu.Pst` instance which 
#contains all the input that ultimately goes in the PEST control %%file
pst = pst_helper.pst
pst.npar,pst.nobs

### Oh snap!

`pyemu` uses `pandas` data frame format for the parameter and observation data sections. This offers plenty of querying and bulk editing options.

Let's stop for a moment to get a better feel for what just happened! Let's dig in..

In [None]:
# check out hydraulic conductivity parameters
pst.parameter_data.loc[pst.parameter_data.parnme.apply(lambda x: "hk" in x),:] 
#[x for x in pst.parameter_data.parnme if "hk" in x]

In [None]:
# what about observations? in particular, the sfr flow-out observations in the last stress period?
pst.observation_data.loc[(pst.observation_data.obgnme.apply(lambda x: "flout" in x)) &
                         (pst.observation_data.obsnme.apply(lambda x: "1980" in x)),:]

### Final bits and bobs
We need to set some realistic parameter bounds and account for expected (but stochastic) scenario conditions:

In [None]:
par = pst.parameter_data  # we inspected this guy earlier
# properties
tag_dict = {"hk":[0.1,10.0],"vka":[0.1,10],"strt":[0.95,1.05]}
for t,[l,u] in tag_dict.items():
    t_pars = par.loc[par.parnme.apply(lambda x: t in x ),"parnme"]
    par.loc[t_pars,"parubnd"] = u
    par.loc[t_pars,"parlbnd"] = l

given the combinations of multipliers, we need to set a hard upper bound on sy since it has a physical upper limit (note: seperate to bounds handled explicitly by pest)

In [None]:
arr_csv = os.path.join(pst_helper.new_model_ws,"arr_pars.csv")
df = pd.read_csv(arr_csv,index_col=0)
df.head()

In [None]:
sy_pr = df.model_file.apply(lambda x: "sy" in x)
df.loc[:,"upper_bound"] = np.NaN
df.loc[sy_pr,"upper_bound"] = 0.4
df.to_csv(arr_csv)

In [None]:
# table can also be written to a .tex file (report-ready!)
pst.write_par_summary_table(filename="none").sort_index()

In [None]:
pst.write_obs_summary_table(filename="none").head()

Let's run the process once (`noptmax=0`) to make sure its all plumbed up.  Pro-tip: you can use any of the `pestpp-###` binaries/executables to run `noptmax=0`

In [None]:
pst.control_data.noptmax = 0
pst.write(os.path.join(pst_helper.new_model_ws,"freyberg.pst"))
pyemu.os_utils.run("pestpp-ies freyberg.pst",cwd=pst_helper.new_model_ws)


Its always good practice to `assert` here and there:

In [None]:
pst = pyemu.Pst(os.path.join(pst_helper.new_model_ws,"freyberg.pst"))
assert pst.phi < 1.0e-10


Now let's take it up a notch. We need to generate the prior parameter covariance matrix and stochastic realizations.  We will use the geostatistical covariance information in the `pst_helper` instance for this:

In [None]:
if pst_helper.pst.npar < 15000:
    cov = pst_helper.build_prior(fmt="coo",filename=os.path.join(pst_helper.new_model_ws,"prior_cov.jcb"))
    cov = np.ma.masked_where(cov.x==0,cov.x)
    if False:
        try:
            fig = plt.figure(figsize=(10,10))
            ax = plt.subplot(111)
            ax.imshow(cov)
            plt.show()
        except:
            pass

### now we can make a draw from the prior parameter covariance matrix to form a prior parameter ensemble

In [None]:
pe = pst_helper.draw(100)

You can see that parameters are treated in parameter group (`pargp`) blocks for this ensemble generation. 

Always a good idea to inspect the parameter ensemble for reasonableness! Can do via slicing and dicing...

In [None]:
pe.iloc[-10:-5,:10]

Let's plot one parameter:

In [None]:
par = pst_helper.pst.parameter_data
pyemu.plot_utils.ensemble_helper(pe,plot_cols=par.groupby("pargp").groups,bins=20)
plt.show()

Thoughts? Do these look reasonable? We see log-normal distributions for log-transformed parameters, e.g., hk... looking good!

Now we need to enforce parameter bounds and save this ensemble for later

In [None]:
pe.enforce()  # always a good idea!
pe.to_binary(os.path.join(pst_helper.new_model_ws,"prior.jcb"))

### set weights and values for "observations" and identify forecasts

The next major task is to set the weights on the observations.  So far, in the `pst_helper` process, we simply identified what outputs from the model we want to "observe".  We now use a pre-cooked csv file to set nonzero weights only for GW level observation locations used in the original Freyberg model.  We will also use the SFR flow out of the last reach (`fo` in the last row in `19791230`)

In [None]:
obs_locs = pd.read_csv(os.path.join(b_d,"obs_loc.csv"))
#build obs names that correspond to the obsnme values in the control file
obs_locs.loc[:,"obsnme"] = obs_locs.apply(lambda x: "hds_00_{0:03d}_{1:03d}_000".format(x.row-1,x.col-1),axis=1)
obs_locs

Set all weights to zero first, then turn on the weights at only a few locations.  We will set more meaningful weights later...,

In [None]:
obs = pst.observation_data
obs.loc[:,"weight"] = 0.0
obs.loc[obs_locs.obsnme,"weight"] = 1.0
obs.loc[obs_locs.obsnme,"obgnme"] = "calhead"
fo_obs = "fo_{0}_19791230".format(pst_helper.m.nrow-1)
obs.loc[fo_obs,"weight"] = 1.0
obs.loc[fo_obs,"obgnme"] = "calflux"
pst.nnz_obs_names

Now we need to set the actual "observed values"

In [None]:
obs_data = pd.read_csv(os.path.join(b_d,"obs_data.csv"),index_col=0)
obs_data

In [None]:
obs.loc[obs_data.index,"obsval"] = obs_data.loc[:,"obsval"]
obs.loc[obs_data.index,"weight"] = obs_data.loc[:,"weight"]
pst.observation_data.loc[pst.nnz_obs_names,:]

Normally, that process will take significant amounts of data wrangling...

Now we will define which model outputs are going to be treated as "forecasts"

In [None]:
swgw_forecasts = obs.loc[obs.obsnme.apply(lambda x: "fa" in x and ("hw" in x or "tw" in x)),"obsnme"].tolist()
swgw_forecasts

In [None]:
hds_fore_name = "hds_00_{0:03d}_{1:03d}".format(int(pst_helper.m.nrow/3),int(pst_helper.m.ncol/10))
hds_forecasts = obs.loc[obs.obsnme.apply(lambda x: hds_fore_name in x),"obsnme"].tolist()
forecasts = swgw_forecasts
forecasts.extend(hds_forecasts)
pst_helper.pst.pestpp_options["forecasts"] = forecasts
forecasts

After all these changes to the pst object, we need to re-write the pcf!

In [None]:
pst.write(os.path.join(pst_helper.new_model_ws,"freyberg.pst"))

Run one last time.  `phi` should be near zero since we haven't change the `parval1` values for historic stress period and only the 13 gw level obs have nonzero weights

In [None]:
pyemu.os_utils.run("pestpp-ies.exe freyberg.pst",cwd=pst_helper.new_model_ws)
pst = pyemu.Pst(os.path.join(pst_helper.new_model_ws,"freyberg.pst"))
pst.phi

In [None]:
lst = flopy.utils.MfListBudget(os.path.join("template","freyberg.list"))
df = lst.get_dataframes(diff=True)[0]
df.plot(kind="bar",figsize=(10,10), grid=True)
plt.show()

In [None]:
pst = pyemu.Pst(os.path.join(pst_helper.new_model_ws,"freyberg.pst"))
print(pst.phi)
plt.figure()
pst.plot(kind='phi_pie');
print('Here are the non-zero weighted observation contributions to phi')

figs = pst.plot(kind="1to1");
pst.res.loc[pst.nnz_obs_names,:]
plt.show()