# Multiple runs of a General Simulation Model

In [4]:
%matplotlib notebook

import os
import datetime as dt
import pickle, joblib


# Standard data science libraries
import pandas as pd
import numpy as np
import scipy.stats as ss
import scipy.optimize as so 
import scipy.interpolate as si

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-notebook')

# Options for pandas
pd.options.display.max_columns = 20
pd.options.display.max_rows = 200

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython.display import Image
from IPython.display import Math


In [1]:
import shutil
import flopy as fp
import statsmodels.api as sm # for lowess smoothing of plots
from scipy.spatial import ConvexHull # to find the Pareto front
import shapely # to operate on the parameter space
from scipy.spatial.distance import cdist # to operate on parameter space
from argparse import Namespace
import json

import RTD_util6 as rtd_ut # custom module with utilities

## Preliminary stuff

In [None]:
mp_exe_name7 = '../Executables/modpath_7_2_001/bin/mpath7.exe'
model_ws = 'optimal_model'

with open('GenMod_metadata.txt') as json_file:
    metadata = json.load(json_file)
    
src = os.path.join('model_ws', 'gsm_metadata.json')
with open(src, 'r') as f:
    gsm_metadata = json.load(f)   
    
from argparse import Namespace
meta = Namespace(**gsm_metadata)

Read metadata dictionary that was created when the model was created.

In [None]:
src = os.path.join(model_ws, 'gsm_metadata.json')
with open(src, 'r') as f:
    gsm_metadata = json.load(f)   
    
meta = Namespace(**gsm_metadata)

Copy the GSM that was created over to the scratch directory. It will be replaced many times during the exploration of parameter space.

In [None]:
if os.path.exists('model_ws/calibration_runs'):
    shutil.rmtree('model_ws/calibration_runs')
shutil.copytree('model_ws', 'model_ws/calibration_runs')

Load the model and extract a few variables. 

In [None]:
sim = fp.mf6.MFSimulation.load(sim_name='mfsim.nam', version='mf6', exe_name=metadata['modflow_path'],
                               sim_ws='model_ws/calibration_runs', strict=True, verbosity_level=0, 
                               load_only=None, verify_data=False)

model = sim.get_model()

dis = model.get_package('dis')
top_ar = dis.top.array
top = top_ar.ravel()
nlay, nrow, ncol = dis.nlay.array, dis.nrow.array, dis.ncol.array
delc = dis.delc.array
delr = dis.delr.array

npf = model.get_package('npf')
k = npf.k.array
k33 = npf.k33.array

tmp = np.load(os.path.join('bedrock_flag_array.npz'))
bedrock_index = tmp['bedrock_index']

print ('   ... done') 

Load the model_grid.csv file to get the observation cell types

In [None]:
model_file = os.path.join(geo_ws, 'model_grid.csv')
model_grid = pd.read_csv(model_file)
model_grid.fillna(0, inplace=True)

model_grid.loc[model_grid[meta.K_bdrk] == 0, meta.ibound] = 0
model_grid.loc[model_grid[meta.K_surf] == 0, meta.ibound] = 0

model_grid.loc[model_grid.ibound == 0, 'obs_type'] = np.nan

topo_cells = model_grid.obs_type == 'topo'
hydro_cells = model_grid.obs_type == 'hydro'

num_topo = model_grid.obs_type.value_counts()['topo']
num_hydro = model_grid.obs_type.value_counts()['hydro']


Define a function to update parameter values, run the model, and calculate hydro and topo errors. This updates streambed hydraulic conductivity ($K_{stream}$) also. It estimates hydraulic conductivity ($K$)  as a factor of the original parameter field. Intermediate K in bedrock layers between top and bottom are assumed to decay exponentially.

In [None]:
def run_model(k_surf_mult, k_bdrk_mult, stream_mult, k_bottom_fraction, sim_ws='model_ws/calibration_runs'):
    
#   transform the log multipliers to real multipliers
    k_surf_mult = 10 ** k_surf_mult
    k_bdrk_mult = 10 ** k_bdrk_mult
    stream_mult = 10 ** stream_mult
    k_bottom_fraction = 10 ** k_bottom_fraction    
    
    # use flopy to read in the model
    sim = fp.mf6.MFSimulation.load(sim_name='mfsim.nam', version='mf6', 
                                   exe_name=metadata['modflow_path'],
                                   sim_ws=sim_ws, strict=True, verbosity_level=0, 
                                   load_only=None, verify_data=False)
    model = sim.get_model()
    dis = model.get_package('dis')
    npf = model.get_package('npf')

    # set K in each layer
    k_top_of_bedrock = k[-gsm_metadata['num_bdrk_layers']] * k_bdrk_mult
    k_bottom_of_bedrock = k[-1, ...] * k_bottom_fraction

    grid = np.empty((nlay+1, nrow, ncol))
    grid[0, ...] = dis.top.array
    grid[1:, ...] = dis.botm.array
    z = (grid[0:-1, ...] + grid[1:, ...] ) / 2

    a = np.log(k_bottom_of_bedrock / k_top_of_bedrock) / (z[-1 , ...] - z[-gsm_metadata['num_bdrk_layers']])
    c = k_top_of_bedrock * np.exp(-a * z[-gsm_metadata['num_bdrk_layers']])
    k_exp = c * np.exp(a * z)

    new_k = np.where(bedrock_index, k_exp, k_surf_mult * k)
    npf.k = new_k

    model_grid[meta.K_surf] = new_k[0, ...].ravel()
    
    # set drain data in each drain cell
    drn_data = model_grid[(model_grid.order != 0) &
                          (model_grid[meta.ibound] == 1)].copy()

    # adjust streambed K based on cell K and stream_mult
    drn_data['dcond'] = drn_data[meta.K_surf] * stream_mult * \
        drn_data.reach_len * drn_data.width / meta.stream_bed_thk
    drn_data['iface'] = 6
    drn_data = drn_data.reindex(
        ['lay', 'row', 'col', 'stage', 'dcond', 'iface'], axis=1)
    drn_data.rename(columns={'lay': 'k', 'row': 'i',
                             'col': 'j', 'stage': 'stage'}, inplace=True)
    drn_data = drn_data[drn_data.dcond > 0]
    
    cellid = list(zip(drn_data.k, drn_data.i, drn_data.j))

    drn_data6 = pd.DataFrame({'cellid': cellid, 'stage': drn_data.stage,
                              'dcond': drn_data.dcond, 'iface': drn_data.iface})
    drn_recarray6 = drn_data6.to_records(index=False)
    drn_dict6 = {0: drn_recarray6}

    drn = model.get_package('drn')
    drn.stress_period_data = drn_dict6

    # run the model 
    sim.write_simulation()
    sim.run_simulation(silent=True)

    # calculate the errors
    rtd = rtd_ut.RTD_util(sim, 'flow', 'rt')
    rtd.get_watertable()
    water_table = rtd.water_table

    t_crit = (model_grid.obs_type =='topo') & (model_grid[meta.ibound] != 0)
    topo_cells = t_crit.values.reshape(nrow, ncol)

    h_crit = (model_grid.obs_type =='hydro') & (model_grid[meta.ibound] != 0)
    hydro_cells = h_crit.values.reshape(nrow, ncol)

    num_topo = np.count_nonzero(topo_cells)
    num_hydro = np.count_nonzero(hydro_cells)

    topo = (top_ar + meta.err_tol) < water_table
    hydro = (top_ar - meta.err_tol) > water_table

    topo_error = topo & topo_cells
    hydro_error = hydro & hydro_cells

    t = np.count_nonzero(topo_error)
    h = np.count_nonzero(hydro_error)

    topo_rate = t / num_topo
    hydro_rate = h / num_hydro

    return topo_rate, hydro_rate


Read optimization results.

In [None]:
src = os.path.join(model_ws, 'results_df.csv')
df = pd.read_csv(src)

Find one set of the optimal parameters by considering where the Pareto (tradeoff) front between hydro and topo errors intersects the line of hydro error = topo error.

### To evaluate uncertainty

Find the Pareto front where there is a tradeoff between hydro and topo errors. To do this, we must separate the two halves of the convex hull polygon.  We only want the minimum.  Do this by creating a vertical line at each point along the front (which will be at a convex hull node) and taking the minimum.  Assemble the minima into a line shape.

Plot all points in parameter space and the Pareto front

In [5]:
dst = os.path.join('optimal_model', 'pareto_sets.csv')
pareto_df= pd.read_csv(dst)


The following cell shows how to loop through parameter sets on the Pareto front; however, it does not capture the results and needs to be improved. To evaluate uncertainty, travel time distributions could be calculated for each of the parameter sets. 

In [None]:
# iterate through the list of paramter set indices
for i, j in df.iloc[pareto_sets].iterrows():
    # extract the parameter set at that index and run the model with it
    run_model(j.k_surf_mult, j.k_bdrk_mult, j.stream_mult, j.k_bottom_fraction)