This notebook is used to get residence-time distribution (RTD) for individual wells from an existing MODFLOW model. It is possible to read in any group or label from a 3D array and make RTDs for those groups. The approach is to 
* read an existing model
* create flux-weighted particle starting locations in every cell
* run MODPATH and read endpoints
* fit parametric distributions

In [1]:
__author__ = 'Jeff Starn'
%matplotlib notebook

from IPython.display import set_matplotlib_formats
set_matplotlib_formats('png', 'pdf')
from IPython.display import Image
from IPython.display import Math
from ipywidgets import interact, Dropdown
from IPython.display import display

import os
import sys
import shutil
import pickle
import numpy as np
import datetime as dt
import geopandas as gp
import matplotlib.pyplot as plt
import matplotlib.ticker as mt
import flopy as fp
import imeth
import fit_parametric_distributions
import pandas as pd
import scipy.stats as ss
import scipy.optimize as so
from scipy.interpolate import Rbf
from scipy.interpolate import griddata
from scipy.interpolate import UnivariateSpline


# Preliminary stuff

## Set user-defined variables

MODFLOW and MODPATH use elapsed time and are not aware of calendar time. To place MODFLOW/MODPATH elapsed time on the calendar, two calendar dates were specified at the top of the notebook: the beginning of the first stress period (`mf_start_date`) and when particles are to be released (`mp_release_date`). The latter date could be used in many ways, for example to represent a sampling date, or it could be looped over to create a time-lapse set of ages. 

`num_surf_layers` is an arbitrary layer number on which to divide the model domain for calculating RTDs. For example, in glacial aquifers it could represent the layer number of the bottom of unconsolidated deposits. In that case, anything below this layer could be considered bedrock.

`num_depth_groups` is an arbitrary number of equally groups starting from the water table to the bottom of the lowest model layer.

In [None]:
homes = ['../Models']
fig_dir = '../Figures'

mfpth = '../executables/MODFLOW-NWT_1.0.9/bin/MODFLOW-NWT_64.exe'
mp_exe_name = '../executables/modpath.6_0/bin/mp6.exe' 

mf_start_date_str = '01/01/1900' 
mp_release_date_str = '01/01/2020' 

num_surf_layers = 3
num_depth_groups = 5

age_cutoff = 65
year_cutoff = 1952

por = 0.20

In [8]:
dir_list = []
mod_list = []
i = 0

for home in homes:
    if os.path.exists(home):
        for dirpath, dirnames, filenames in os.walk(home):
            for f in filenames:
                if os.path.splitext(f)[-1] == '.nam':
                    mod = os.path.splitext(f)[0]
                    mod_list.append(mod)
                    dir_list.append(os.path.join(dirpath, f))
                    i += 1
print('    {} models read'.format(i))

    5 models read


In [9]:
model_area = Dropdown(
    options=mod_list,
    description='Model:',
    background_color='cyan',
    border_color='black',
    border_width=2)
display(model_area)

In [10]:
model = model_area.value
nam_path = [item for item in dir_list if model in item][0]
nam_file = os.path.basename(nam_path)
model_ws = os.path.dirname(nam_path)

new_ws = os.path.join(model_ws, 'WEL')
geo_ws = os.path.dirname(model_ws)

print("working model is {}".format(model_ws))

In [12]:
# Load an existing model

print ('Reading model information')

fpmg = fp.modflow.Modflow.load(nam_file, model_ws=model_ws, exe_name=mfpth, version='mfnwt', 
                               load_only=['DIS', 'BAS6', 'UPW', 'OC'], check=False)

dis = fpmg.get_package('DIS')
bas = fpmg.get_package('BAS6')
upw = fpmg.get_package('UPW')
oc = fpmg.get_package('OC')

delr = dis.delr
delc = dis.delc
nlay = dis.nlay
nrow = dis.nrow
ncol = dis.ncol
bot = dis.getbotm()
#     top = dis.gettop()

hnoflo = bas.hnoflo
ibound = np.asarray(bas.ibound.get_value())
hdry = upw.hdry

print ('   ... done') 

working model is ../Models\NACP\NA_TRNWT
Reading model information
   ... done


In [13]:
## Specification of time in MODFLOW/MODPATH

#     There are several time-related concepts used in MODPATH.
#     * `simulation time` is the elapsed time in model time units from the beginning of the first stress period
#     * `reference time` is an arbitrary value of `simulation time` that is between the beginning and ending of `simulation time`
#     * `tracking time` is the elapsed time relative to `reference time`. It is always positive regardless of whether particles are tracked forward or backward
#     * `release time` is when a particle is released and is specified in `tracking time`

# setup dictionaries of the MODFLOW units for proper labeling of figures.
lenunit = {0:'undefined units', 1:'feet', 2:'meters', 3:'centimeters'}
timeunit = {0:'undefined', 1:'second', 2:'minute', 3:'hour', 4:'day', 5:'year'}

# Create dictionary of multipliers for converting model time units to days
time_dict = dict()
time_dict[0] = 1.0 # undefined assumes days, so enter conversion to days
time_dict[1] = 24 * 60 * 60
time_dict[2] = 24 * 60
time_dict[3] = 24
time_dict[4] = 1.0
time_dict[5] = 1.0

# convert string representation of dates into Python datetime objects
mf_start_date = dt.datetime.strptime(mf_start_date_str , '%m/%d/%Y')
mp_release_date = dt.datetime.strptime(mp_release_date_str , '%m/%d/%Y')

# convert simulation time to days from the units specified in the MODFLOW DIS file
sim_time = np.append(0, dis.get_totim())
sim_time /= time_dict[dis.itmuni]

# make a list of simulation time formatted as calendar dates
date_list = [mf_start_date + dt.timedelta(days = item) for item in sim_time]

# reference time and date are set to the end of the last stress period
ref_time = sim_time[-1]
ref_date = date_list[-1]

# release time is calculated in tracking time (for particle release) and 
# in simulation time (for identifying head and budget components)
release_time_trk = np.abs((ref_date - mp_release_date).days)
release_time_sim = (mp_release_date - mf_start_date).days

# Fit parametric distributions

In [15]:
src = os.path.join(model_ws, 'WEL', 'node_df.csv')
node_df = pd.read_csv(src)

src = os.path.join(model_ws, 'WEL', 'well_gdf.shp')
well_shp = gp.read_file(src)
well_shp['STAID'] = well_shp.STAID.astype(np.int64())

src = os.path.join(model_ws, 'WEL', 'sample_gdf.shp')
sample_shp = gp.read_file(src)
sample_shp['STAID'] = sample_shp.STAID.astype(np.int64())
sample_shp['DATES'] = pd.to_datetime(sample_shp['DATES'])

# Process endpoint information

In [19]:
## Read endpoint file

# form the path to the endpoint file
# form the path to the endpoint file
mpname = '{}_flux_{}'.format(fpmg.name, 'all_layers')

endpoint_file = '{}.{}'.format(mpname, 'mpend')
endpoint_file = os.path.join(model_ws, endpoint_file)

dist_list = [ss.weibull_min]
fit_dict = dict()
method = 'add_weibull_min'
# group nodes by station ID
ng = node_df.groupby('staid')
fit_dict = {}

In [None]:
xp = np.arange(1, 10) * 10
cols = ['mean particle age', 'standard dev of particle age', 
        'minimum particle age', 
        '10th percentile of particle age', 
        '20th percentile of particle age', 
        '30th percentile of particle age', 
        '40th percentile of particle age', 
        '50th percentile of particle age', 
        '60th percentile of particle age', 
        '70th percentile of particle age', 
        '80th percentile of particle age', 
        '90th percentile of particle age', 
        'maximum particle age', 
        'number particles < {} yrs old'.format(age_cutoff), 
        'mean age of particles < {} yrs old'.format(age_cutoff), 
        'proportion of particles < {} yrs old'.format(age_cutoff), 
        'number particles recharged since {}'.format(year_cutoff), 
        'mean age of particles recharged since {}'.format(year_cutoff), 
        'proportion of particles particles recharged since {}'.format(year_cutoff), 
        'total number of particles', 
        'minimum linear x-y path length',
        'median linear x-y path length',
        'maximum linear x-y path length',
        'minimum linear x-y-z path length',
        'median linear x-y-z path length',
        'maximum linear x-y-z path length',
        'One component Weibull shape (log ages)',
        'One component Weibull location (log ages)',
        'One component Weibull scale (log ages)',
        'Two component Weibull shape 1 (log ages)',
        'Two component Weibull location 1 (log ages)',
        'Two component Weibull scale 1 (log ages)',
        'Two component Weibull shape 2 (log ages)',
        'Two component Weibull location 2 (log ages)',
        'Two component Weibull scale 2 (log ages)',
        'Two component Weibull fraction (log ages)',
       ]

data_df = pd.DataFrame(index=cols)

In [23]:
# loop through station ID groups
for staid, nodes in ng:
    # start dictionary for this well
    rt = list()
    # append particles rt's for all nodes for each well
    for k, m in nodes.iterrows():
        rt.extend(ep_data.loc[ep_data.initial_node_num == m.seqnum, 'rt'])

    # # sort rt's
    rt.sort()
    trav_time_raw = np.array(rt)

    # create arrays of CDF value between 1/x and 1
    # number of particles above num_surf_layers
    n = trav_time_raw.shape[0]

    # number of particles desired to approximate the particle CDF
    s = 1000
    ly = np.linspace(1. / s, 1., s, endpoint=True)
    tt_cdf = np.linspace(1. / n, 1., n, endpoint=True)

    # log transform the travel times and normalize to porosity
    tt = np.log(trav_time_raw / por)

    # interpolate at equally spaced points to reduce the number of particles
    lprt = np.interp(ly, tt_cdf , tt)
    first = lprt.min()

    try:
        fit_dict[staid] = fit_parametric_distributions.fit_dists(ly, lprt, dist_list)
    except:
        pass

    tmp = pd.DataFrame(columns=cols)
    print(staid)
    for i, j in sample_shp[sample_shp.STAID == staid].iterrows():
        xp = np.arange(1, 10) * 10
        data = np.zeros((36))
        tt_count = trav_time_raw.shape[0]
        if tt_count > 0:
            data[0] = trav_time_raw.rt.mean()
            data[1] = trav_time_raw.rt.std()
            data[2] = trav_time_raw.rt.min()
            for n, i in enumerate(xp):
                data[n+3] = np.percentile(trav_time_raw.rt, i)
            data[12] = trav_time_raw.rt.max()
            cutoff_age_ar = trav_time_raw[trav_time_raw.rt < age_cutoff]
            cutoff_age_count = cutoff_age_ar.shape[0]
            data[13] = cutoff_age_count
            data[14] = cutoff_age_ar.rt.mean()
            data[15] = cutoff_age_count / tt_count
            cut_off_years_ago = mp_release_date - dt.datetime.strptime(year_cutoff, '%m/%d/%Y')

            cutoff_year_ar = trav_time_raw.loc[trav_time_raw.rt < cut_off_years_ago.days / 365.25, :]
            cutoff_year_count = cutoff_year_ar.shape[0]
            data[16] = cutoff_year_count
            data[17] = cutoff_year_ar.rt.mean()
            data[18] = cutoff_year_count / tt_count                
            data[19] = trav_time_raw.rt.count()
            data[20] = trav_time_raw.xy_path_len.min()
            data[21] = trav_time_raw.xy_path_len.median()
            data[22] = trav_time_raw.xy_path_len.max()
            data[23] = trav_time_raw.xyz_path_len.min()
            data[24] = trav_time_raw.xyz_path_len.median()
            data[25] = trav_time_raw.xyz_path_len.max()
            data[26:29] = fit_dict[group]['par']['uni_weibull_min']
            data[29:37] = fit_dict[group]['par']['add_weibull_min']
        else:
            data = np.nan

        data_df[staid] = data
    
dst = os.path.join(model_ws, 'summary_data_wells.csv')
data_df.to_csv(dst)
        
dst = os.path.join(fig_dir, 'RTD_metrics.csv')
data_df.to_csv(dst)

dst = os.path.join(model_ws, 'fit_dict_wells_{}.pickle'.format(model))
with open(dst, 'wb') as f:
    pickle.dump(fit_dict, f)

# except FileNotFoundError:
# print('Sample and wells files not found')

no points for well 352832076470102
no points for well 352839077040701
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min


  x = np.asarray((x - loc)/scale, dtype=dtyp)
  x = np.asarray((x - loc)/scale, dtype=dtyp)
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = (x >= self.b) & cond0
  ret = ret.dtype.type(ret / rcount)


   ... done
353143076575701
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
353221076105501
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
add_weibull_min Optimal parameters not found: The maximum number of function evaluations is exceeded.
   ... done
353547076473301
no points for well 353747077052001
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
354838076541201
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
354926076452901
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
355059076443001
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
355220076371501
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
355601076352401
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
add_weibull_min Optimal paramet

  active_set = on_bound * g < 0
  return umr_maximum(a, axis, None, out, keepdims)


add_weibull_min Optimal parameters not found: The maximum number of function evaluations is exceeded.
   ... done
370156077063801
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
372105077113601
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min


  (ub - x)[non_zero] / s_non_zero)
  return umr_minimum(a, axis, None, out, keepdims)
  return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)
  cond = logical_and(cond, (asarray(arg) > 0))
  cond0 = self._argcheck(*args) & (scale > 0)
  cond1 = self._open_support_mask(x) & (scale > 0)
  lb_total = np.maximum(lb_centered, -tr_bounds)
  ub_total = np.minimum(ub_centered, tr_bounds)
  return np.all((x >= lb) & (x <= ub))
  return np.all((x >= lb) & (x <= ub))


add_weibull_min Optimal parameters not found: The maximum number of function evaluations is exceeded.
   ... done
372148076461001
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
372148076461002
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
372454076512801
no points for well 372823076433601
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
373049075484101
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
373330075494601
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
373759076345501
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
374550077300501
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
375541076514301
no points for well 380027075410802
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_

fitting add_weibull_min
   ... done
390126075575401
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390126075575402
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390217076181401
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390643074522501
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390655075324601
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390657076462601
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390657076462602
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390700076412701
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390729075315902
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
390813076141501
starting par

   ... done
400312074531601
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
400350074451001
no points for well 400415074080701
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
400432074341201
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
401053074034101
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
401055074035101
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
401204074413801
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
401536074292001
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
401646074173701
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
401655074002601
starting parametric fits...
fitting uni_weibull_min
fitting add_weibull_min
   ... done
401656074014701
s

In [None]:
#             tmp.loc[staid, 'date'] = j.DATES
#             tt_count = trav_time_raw.shape[0]
#             tmp.loc[staid, 'count'] = tt_count
#             tmp.loc[staid, 'mean'] = trav_time_raw.mean()
#             tmp.loc[staid, 'std'] = trav_time_raw.std()
#             tmp.loc[staid, 'min'] = trav_time_raw.min()
#             tmp.loc[staid, xp] = np.percentile(trav_time_raw, xp)
#             tmp.loc[staid, 'max'] = trav_time_raw.max()
#             cutoff_age_ar = trav_time_raw[trav_time_raw < age_cutoff]
#             cutoff_age_count = cutoff_age_ar.shape[0]
#             tmp.loc[staid, '<cutoff_age_count'] = cutoff_age_count
#             tmp.loc[staid, '<cutoff_age_mean'] = cutoff_age_ar.mean()
#             tmp.loc[staid, '<cutoff_age_frac'] = cutoff_age_count / tt_count
#             cutoff_year_ar = trav_time_raw[(j.DATES.year - trav_time_raw) >= year_cutoff]
#             cutoff_year_count = cutoff_year_ar.shape[0]
#             tmp.loc[staid, '>cutoff_year_count'] = cutoff_year_count
#             tmp.loc[staid, '>cutoff_year_mean'] = cutoff_year_ar.mean()
#             tmp.loc[staid, '>cutoff_year_frac'] = cutoff_year_count / tt_count                
#             tmp.loc[staid, 'sucode'] = j.SuCode
#             tmp.loc[staid, 'Trit'] = j.Trit
#             tmp.loc[staid, 'SF6_PPTV'] = j.SF6_PPTV
#             alt = pd.to_numeric(j.ALT_VA)
#             tmp.loc[staid, 'screentop'] = j.MINOFOPEN_
#             tmp.loc[staid, 'midscreen'] = (j.MAXOFOPEN_ + j.MINOFOPEN_) / 2
#             data_df = data_df.append(tmp)
