In [None]:
%matplotlib notebook

import os
import datetime as dt
import pickle, joblib


# Standard data science libraries
import pandas as pd
import numpy as np
import scipy.stats as ss
import scipy.optimize as so 
import scipy.interpolate as si

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-notebook')

# Options for pandas
pd.options.display.max_columns = 20
pd.options.display.max_rows = 200

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from IPython.display import Image
from IPython.display import Math



In [None]:
from ipywidgets import interact, Dropdown
from IPython.display import display

import flopy as fp
import geopandas as gpd
from shapely.geometry import LineString, MultiLineString, Point

import RTD_util6 as rtd_ut
import Genmod_Utilities as gmu

import matplotlib.dates as mdates
import matplotlib.ticker as mticks

import json


The following cell sets color and font defaults that work for AGU journals.

In [None]:
KS1 = '#06366E'
KS2 = '#00A3EB'
KS3 = '#25C0A6'
KS4 = '#FDDA58'
KS5 = '#5D171A'

font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 12,
        'sans-serif' : 'Arial'}

plt.rc('font', **font)

Set the location of the MODPATH7 executable file.

In [None]:
mp_exe_name7 = '../Executables/modpath_7_2_001/bin/mpath7.exe'
model_ws = 'optimal_model'

Read some files that were created in previous notebooks.

In [None]:
with open('GenMod_metadata.txt') as json_file:
    metadata = json.load(json_file)
    
src = os.path.join('model_ws', 'gsm_metadata.json')
with open(src, 'r') as f:
    gsm_metadata = json.load(f)   
    
from argparse import Namespace
meta = Namespace(**gsm_metadata)

## Use General Simulation Model to calculate TTD

Read MODFLOW model and create RTD object

In [None]:
print('Reading model information')

ml = fp.mf6.MFSimulation.load(sim_name='mfsim.nam', version='mf6', exe_name=metadata['modflow_path'],
                              sim_ws='optimal_model', strict=True, verbosity_level=0, load_only=None, verify_data=False)
model = ml.get_model()
rtd = rtd_ut.RTD_util(ml, 'flow', 'rt')
print('   ... done')

Read model output and compute net inflow to drain cells. This cell reads baseflow nflow to stream segments and attaches the values to the NHD stream segments. The resulting shapefile is called `drain_flows` and will be placed in `optimal_model` directory.

In [None]:
# read shapefile created in step 1--NHD flowlines intersected with model grid
src = os.path.join('gis', 'drain_segments.shp')
shp = gpd.read_file(src)

# read shapefile created in step 1--NHD flowlines intersected with model grid
src = os.path.join('gis', 'nhd_clip.shp')
nhd = gpd.read_file(src)
nhd_crs = nhd.crs

# read shapefile created in step 1--NHD flowlines intersected with model grid
domain = gpd.read_file(metadata['domain_name'])
domain.to_crs(crs=nhd_crs, inplace=True)

# read enhanced model_grid file in model_ws
src = os.path.join('gis', 'model_grid.csv')
data = pd.read_csv(src)

# extract the drain budget terms from modflow output
rtd.get_budget('DRN')
drains = rtd.budget

# create a dataframe of drain flows
drn_df = pd.DataFrame(drains[0])
drn_df['node'] = drn_df['node'] - 1

# merge drain segments (by model cells) with drain flows
shp_drn_df = shp.merge(drn_df, left_on='node', right_on='node', how='outer')
shp_drn_df = shp_drn_df[shp_drn_df.q < 0]

# save shapefile to model_ws
dst = os.path.join('optimal_model', 'drain_flows.shp')
shp_drn_df.to_file(dst)


Read the modified endpoint information. A modified endpoint file was created in the previous notebook.

In [None]:
endpointfile = '{}_flow_rt_mod.mpend'.format(metadata['HUC8_name'])
ep_data = pd.read_csv(os.path.join('optimal_model', endpointfile))

The following line is necessary because of a bug in Flopy 3.3.2.  Hopefully the bug will be fixed in future versions. 

In [None]:
ep_data['Particle ID'] = ep_data['Particle ID'] - 1

Create functions that will be used to summarize age data.

In [None]:
def meantt(x):
    return np.mean(x)

def mediantt(x):
    return np.median(x)

def fracyoung(x):
    return (x < 65).sum() / x.shape[0]

def meanyoung(x):
    return x[x < 65].mean()

def medianold(x):
    return np.median(x[x >= 65])
    
def meanpath(x):
    return np.mean(x)

agg_func = {'rt': [meantt, mediantt, fracyoung, meanyoung , medianold], 'xyz_path_len': meanpath} 

Each stream segment may contain many particles.  The next cell groups the particle information by stream segment, thus creating a distribution of ages for each segment. The error message that gets generated is not important.

In [None]:
nhd['Particle ID'] = nhd.NHDPlusID.astype(np.int64()).astype(str).str[-9:].astype(np.int32())
summary = ep_data.groupby('Particle ID').agg(agg_func)
nhd_age = summary.merge(nhd, left_index=True, right_on='Particle ID')

Make the column headers more understandable and set the coordinate reference system (CRS).

In [None]:
rep_str = {('rt', 'meantt'): 'meantt', ('rt', 'mediantt'): 'mediantt',
 ('rt', 'fracyoung'): 'fracyoung', ('rt', 'meanyoung'): 'meanyoung', 
 ('rt', 'medianold'): 'medianold', ('xyz_path_len', 'meanpath'): 'meanpath',
 'maxft': 'maxstage', 'minft': 'minstage'}

nhd_age.rename(columns=rep_str, inplace=True)

nhd_age.set_index('Particle ID', inplace=True)

nhd_age = gpd.GeoDataFrame(nhd_age[['meantt', 'mediantt', 'fracyoung', 'meanyoung', 'medianold', 'meanpath',
       'StreamOrde',
       'maxstage', 'minstage', 
       'geometry']])

nhd_age.crs = nhd_crs

The following cell loops through each stream segment and fits one- and two-component Weibull distributions to the assemblage of particle travel times. This process smooths any irregularities in the travel time disitrbution caused by abrupt changes in properties in the MODFLOW model and makes the distribution continuous by filling the gaps where there were no particles. It only needs to be run once for each simulation. It takes about 30-60 minutes to run a typical HUC8 grid with 1000 km cells. It can be commented out for subsequent runs that may be done to tweak the graphs.



In [None]:
# comid_dict = dict()

# for comid, _df in ep_data.groupby('Particle ID'):
#     t = _df.rt
#     t.values.sort()
#     n = t.shape[0]
#     tt_cdf = np.linspace(1. / n, 1., n, endpoint=True)
#     tmp = rtd.fit_dists(tt_cdf, t, [ss.weibull_min], fit_one=True, fit_two=True)
#     comid_dict[comid] = tmp

# dst = os.path.join(model_ws, 'comid_dict.pkl')
# with open(dst, 'wb') as f:
#     pickle.dump(comid_dict, f)

This cell reads in a previously created travel time dictionary.

In [None]:
dst = os.path.join('optimal_model', 'comid_dict.pkl')
with open(dst, 'rb') as f:
    comid_dict = pickle.load(f)

The two-component Weibull distribution usually fits the particle travel time distribution much better than the one-conponent. The next cell adds the two-component parameters to the dataframe.

In [None]:
li = ['she', 'loe', 'sce', 'shl', 'lol', 'scl', 'f']
df = pd.DataFrame()
x = np.linspace(0, 10000, 10000)

for key, value in comid_dict.items():
    rt = value['tt']['rt']
    num_values = rt.shape[0]
    
    pars = value['par']['two_weibull_min']
    nhd_age.loc[key, li] = pars
    
#     w1 = ss.weibull_min(*pars[0:3])
#     w2 = ss.weibull_min(*pars[3:6])
#     pdf = (pars[6]) * w1.pdf(x) + (1-pars[6]) * w2.pdf(x)


Save the dataframe to a shapefile

In [None]:
dst = os.path.join('optimal_model', 'nhd_age.shp')
nhd_age.to_file(dst)

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True, sharey=True)

var = 'fracyoung'
dum = nhd.plot(ax=ax, linewidth=0.75, color='cornflowerblue')
dum = gpd.GeoDataFrame(nhd_age).plot(column=var, legend=False, ax=ax, cmap=plt.cm.nipy_spectral, linewidth=1)
dum = domain.plot(ax=ax, color='none', edgecolor='black')
vmin=0
vmax=1
sm = plt.cm.ScalarMappable(cmap='nipy_spectral', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# fake up the array of the scalar mappable. Urgh...
sm._A = []
cbaxes = fig.add_axes([0.50, 0.85, 0.3, 0.025]) 
cb = fig.colorbar(sm, ax=ax, cax=cbaxes, orientation='horizontal')  
ax.set_aspect(1)
dum = fig.suptitle('Fraction of young water')
# fig.set_tight_layout(True)

dst = os.path.join('optimal_model', 'metric_maps_frac.png')
plt.savefig(dst)

Image(dst)

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True, sharey=True)

var = 'meanyoung'
dum = nhd.plot(ax=ax, linewidth=0.75, color='cornflowerblue')
dum = gpd.GeoDataFrame(nhd_age).plot(column=var, legend=False, ax=ax, cmap=plt.cm.nipy_spectral, linewidth=1)
dum = domain.plot(ax=ax, color='none', edgecolor='black')
vmin=0
vmax=65
sm = plt.cm.ScalarMappable(cmap='nipy_spectral', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# fake up the array of the scalar mappable. Urgh...
sm._A = []
cbaxes = fig.add_axes([0.50, 0.85, 0.3, 0.025]) 
cb = fig.colorbar(sm, ax=ax, cax=cbaxes, orientation='horizontal')  
ax.set_aspect(1)
dum = fig.suptitle('Mean age of young water')
# fig.set_tight_layout(True)

dst = os.path.join('optimal_model', 'metric_maps_ageyoung.png')
plt.savefig(dst)

Image(dst)

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True, sharey=True)

var = 'medianold'
dum = nhd.plot(ax=ax, linewidth=0.75, color='cornflowerblue')
dum = gpd.GeoDataFrame(nhd_age).plot(column=var, legend=False, ax=ax, cmap=plt.cm.nipy_spectral, linewidth=1)
dum = domain.plot(ax=ax, color='none', edgecolor='black')
vmin=65
vmax=nhd_age[var].max()
sm = plt.cm.ScalarMappable(cmap='nipy_spectral', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# fake up the array of the scalar mappable. Urgh...
sm._A = []
cbaxes = fig.add_axes([0.50, 0.85, 0.3, 0.025]) 
cb = fig.colorbar(sm, ax=ax, cax=cbaxes, orientation='horizontal')  
ax.set_aspect(1)
dum = fig.suptitle('Median age of old water')
# fig.set_tight_layout(True)

dst = os.path.join('optimal_model', 'metric_maps_medianold.png')
plt.savefig(dst)

Image(dst)

In [None]:
fig, ax = plt.subplots(1, 1, sharex=True, sharey=True)

var = 'meanpath'
dum = nhd.plot(ax=ax, linewidth=0.75, color='cornflowerblue')
dum = gpd.GeoDataFrame(nhd_age).plot(column=var, legend=False, ax=ax, cmap=plt.cm.nipy_spectral, linewidth=1)
dum = domain.plot(ax=ax, color='none', edgecolor='black')
vmin=0
vmax=nhd_age[var].max()
sm = plt.cm.ScalarMappable(cmap='nipy_spectral', norm=plt.Normalize(vmin=vmin, vmax=vmax))
# fake up the array of the scalar mappable. Urgh...
sm._A = []
cbaxes = fig.add_axes([0.50, 0.85, 0.3, 0.025]) 
cb = fig.colorbar(sm, ax=ax, cax=cbaxes, orientation='horizontal')  
ax.set_aspect(1)
dum = fig.suptitle('Mean path length')
# fig.set_tight_layout(True)

dst = os.path.join('optimal_model', 'metric_maps_meanpath.png')
plt.savefig(dst)

Image(dst)

In [None]:
nhd_age_df = pd.DataFrame(nhd_age)

fig, ax = plt.subplots(2, 2, sharex=True)
dum = nhd_age_df.groupby('StreamOrde').median().plot(kind='bar', y='fracyoung', ax=ax[0,0], legend=False)
dum = ax[0,0].set_ylabel('Fraction young water')

dum = nhd_age_df.groupby('StreamOrde').median().plot(kind='bar', y='meanyoung', ax=ax[0,1], legend=False)
dum = ax[0, 1].set_ylabel('Mean age of young water')

dum = nhd_age_df.groupby('StreamOrde').median().plot(kind='bar', y='medianold', ax=ax[1,0], legend=False)
dum = ax[1,0].set_xlabel('Stream order')
dum = ax[1,0].set_ylabel('Median age of old water')

dum = nhd_age_df.groupby('StreamOrde').median().plot(kind='bar', y='meanpath', ax=ax[1,1], legend=False)
dum = ax[1,1].set_xlabel('Stream order')
dum = ax[1,1].set_ylabel('Mean path length')

fig.set_tight_layout(True)

dst = os.path.join('optimal_model', 'bar_charts.png')
plt.savefig(dst)
# for i, label in enumerate(list(df.index)):
#     score = df.ix[label]['Score']
#     ax.annotate(str(score), (i, score + 0.2))

Image(dst)

The dictionary `comid_dict` or shapefile `nhd_age` can be used to calculate baseflow concentration and load. The procedure follows, and an example, but for wells, is given in 

    Starn, J.J., Kauffman, L.J., Carlson, C.S., Reddy, J.E., and Fienen, M.N., 2020, Data for three-dimensional distribution of groundwater residence time metrics in the glaciated United States using metamodels trained on general numerical simulation models: U.S. Geological Survey data release, https://doi.org/10.5066/P9BNWWCU.
    
* Create a time-series of dates and a corresponding data set of Julian (floating point) dates.
* Interpolate the time-series of your input data onto the dates
* Loop through `comid_dict` and extract the two-component Weibull parameters
* Reconstruct the age distribution on the same dates and frequency as the input data
* Use np.convolve to run the convolution between input and age distribution
