In [1]:
%matplotlib notebook

import numpy as np, imp, os, datetime as dt, pandas as pd, matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# Path to folder containing wrapper modules
wrapper_fpath = (r"..\inca.py")
optimize_funs_fpath = (r'..\inca_calibration.py')

wr = imp.load_source('inca',wrapper_fpath)
cf = imp.load_source('inca_calibration', optimize_funs_fpath)

# Set up

catchment = 'Morsa' # Choose from: 'Morsa','Tarland'

wr.initialize('simplyp.dll')
dataset = wr.DataSet.setup_from_parameter_and_input_files('../../Applications/SimplyP/%s/%sParameters.dat'%(catchment,catchment),
                                                            '../../Applications/SimplyP/%s/%sInputs.dat'%(catchment,catchment))

### Run the model to see what the results look like with manually-calibrated parameters

In [14]:
dataset.run_model()

# Read data from the dataset
simQ = dataset.get_result_series('Reach flow (daily mean, cumecs)', ['Kure'])
obsQ = dataset.get_input_series('Observed Q', [], alignwithresults=True)

# simQ = dataset.get_result_series('Reach flow (daily mean, cumecs)', ['Tarland1'])
# obsQ = dataset.get_input_series('observed Q', [], alignwithresults=True)

start_date = dt.datetime.strptime(dataset.get_parameter_time('Start date', []),'%Y-%m-%d')
timesteps = dataset.get_parameter_uint('Timesteps', [])
unit = dataset.get_result_unit('Reach flow (daily mean, cumecs)')

# Use data to make a pandas dataframe
date_idx = np.array(pd.date_range(start_date, periods=timesteps))
df = pd.DataFrame({'Date':date_idx, 'Obs_Q': obsQ, 'Sim_Q': simQ})
df.set_index('Date', inplace=True)

# Plot
fig, ax = plt.subplots()
df.plot(figsize=(10,5), ax=ax)
ax.set_ylabel('$%s$' % unit)
plt.ylim(ymin=0)
plt.show()

<IPython.core.display.Javascript object>

In [15]:
chem_var = 'TP'

# Read data from the dataset
sim = dataset.get_result_series('Reach %s concentration' %chem_var, ['Kure'])
# sim = dataset.get_result_series('Reach suspended sediment concentration', ['Kure'])
obs = dataset.get_input_series('Observed %s at Kure' %chem_var, [], alignwithresults=True)
start_date = dt.datetime.strptime(dataset.get_parameter_time('Start date', []),'%Y-%m-%d')
timesteps = dataset.get_parameter_uint('Timesteps', [])

# Use data to make a pandas dataframe
date_idx = np.array(pd.date_range(start_date, periods=timesteps))
df = pd.DataFrame({'Date':date_idx, 'Obs_%s' %chem_var: obs, 'Sim_%s' %chem_var: sim})
df.set_index('Date', inplace=True)

# Plot
fig, ax = plt.subplots()
df['Obs_%s' %chem_var].plot(marker='o', ms=3, ax=ax)
df.plot(figsize=(10,5), ax=ax)
ax.set_ylabel('%s concentration' %chem_var)
plt.show()

<IPython.core.display.Javascript object>

In [8]:
comparisons = [('Reach flow (daily mean, cumecs)', ['Kure'], 'Observed Q', []),
              ('Reach TP concentration', ['Kure'], 'Observed TP at Kure', []),
               ('Reach TDP concentration', ['Kure'], 'Observed TDP at Kure', []),
               ('Reach suspended sediment concentration', ['Kure'], 'Observed SS at Kure', []),
               ('Reach PP concentration', ['Kure'], 'Observed PP at Kure', []),            
              ]

# comparisons = [('Reach flow (daily mean, cumecs)', ['Tarland1'], 'observed Q', [])]

objective = (0, comparisons, 0)
cf.print_goodness_of_fit(dataset, objective)


Goodness of fit for Reach flow (daily mean, cumecs) [Kure] vs Observed Q []:
Mean error (bias): -0.592782
Mean absolute error: 2.631394
Mean square error: 17.772676
Nash-Sutcliffe coefficient: 0.502809


Goodness of fit for Reach TP concentration [Kure] vs Observed TP at Kure []:
Mean error (bias): -0.032819
Mean absolute error: 0.069039
Mean square error: 0.022183
Nash-Sutcliffe coefficient: 0.146248


Goodness of fit for Reach TDP concentration [Kure] vs Observed TDP at Kure []:
Mean error (bias): 0.009568
Mean absolute error: 0.014214
Mean square error: 0.000413
Nash-Sutcliffe coefficient: -1.130266


Goodness of fit for Reach suspended sediment concentration [Kure] vs Observed SS at Kure []:
Mean error (bias): -39.854581
Mean absolute error: 52.931038
Mean square error: 30277.012835
Nash-Sutcliffe coefficient: 0.057763


Goodness of fit for Reach PP concentration [Kure] vs Observed PP at Kure []:
Mean error (bias): -0.025169
Mean absolute error: 0.044542
Mean square error: 0.01004

### Take a look at what is contained in the dataset

In [None]:
print ('Index sets: %s' %dataset.get_index_sets())
print ('Indices in 1st index set: %s' %dataset.get_indexes(dataset.get_index_sets()[0]))
print ('Indices in 2nd index set: %s' %dataset.get_indexes(dataset.get_index_sets()[1]))

### Reset start date and number of time steps, and realign inputs to new model run period

In [None]:
# start_date = "1985-01-01"
# timesteps = 760

# dataset.set_parameter_time('Start date', [], start_date)
# dataset.set_parameter_uint('Timesteps',[],timesteps)

# # Align inputs with new start date and time steps
# model_inputs = [i[0] for i in dataset.get_input_list()]
# for variable in model_inputs:
#     series = dataset.get_input_series(variable,[], True)
#     dataset.set_input_series(variable, [], series, True)

### Set up for optimization

Pick out the parameters we want to calibrate, and set initial values for them as well as minimum and maximum limits we want the algorithm to search within.

In [None]:
paramTupleList = dataset.get_parameter_list()
paramList = [i[0] for i in paramTupleList]
print (paramList)

In [None]:
# # Set up manually here rather than from file

# # comparisons = [('Reach flow (daily mean, cumecs)', ['Kure'], 'Observed Q', []),
# #               ('Reach TP concentration', ['Kure'], 'Observed TP at Kure', [])]

# comparisons = [('Reach flow (daily mean, cumecs)', ['Tarland1'], 'observed Q', [])]

# n_vars = len(comparisons)

# #NOTE: The 'calibration' structure is a list of (indexed) parameters that we want to calibrate
# calibration = [
#     ('Initial snow depth as water equivalent',                      []),
#     ('Degree-day factor for snowmelt',                             []),
#     ('Proportion of precipitation that contributes to quick flow', []),
#     ('PET multiplication factor',                                       []),
#     ('Baseflow index',                                             []),
#     ('Groundwater time constant',                                  []),
#     ('Gradient of reach velocity-discharge relationship',         []),
# #     ('Soil water time constant',                                   ['Arable']),
#     ('Soil water time constant',                                   ['Semi-natural']),
#     ('Soil field capacity',                                        []),
#     ('Minimum groundwater flow',                                   []),
#     ('M_Q', [])
#     ]

# # calibration = [('Degree-day factor for snowmelt', []),
# #  ('Proportion of precipitation that contributes to quick flow', []),
# #  ('PET multiplication factor', []),
# #  ('Soil field capacity', []),
# #  ('Baseflow index', []),
# #  ('Groundwater time constant', []),
# #  ('Minimum groundwater flow', []),
# #  ('Gradient of reach velocity-discharge relationship', []),
# #  ('Soil water time constant', ['Semi-natural']),
# #  ('Reach sediment input scaling factor', []),
# #  ('Groundwater TDP concentration', []),
# #  ('Particulate P enrichment factor', []),
# #  ('Reach effluent TDP inputs', ['Kure']),
# #  ('Initial soil water TDP concentration and EPC0', ['Arable']),
# #  ('M_Q', []),
# #  ('M_TP', [])]

# # Read the initial guess provided by the parameter file
# initial_guess = cf.default_initial_guess(dataset, calibration[:-n_vars])

# # Initial guess for the residual error term
# initial_guess.append(0.5)
# # initial_guess.append(0.05)

# # Set upper and lower limits for parameter values
# param_min = [0.1 * x for x in initial_guess]
# param_max = [10.0 * x for x in initial_guess]

# cf.constrain_min_max(dataset, calibration, param_min, param_max, n_vars) # NOTE: Constrain to the min and max values recommended
#                                                               # by the model in case we made our bounds too wide.

# skiptimesteps = 30   # Skip these many of the first timesteps in the objective evaluation

# objective = (cf.log_likelyhood, comparisons, skiptimesteps)

In [None]:
# Set up from file

# Read in csv with parameters to vary, short names and param ranges
fpath = r'C:\Data\GitHub\INCABuilder\PythonWrapper\SimplyP\SimplyP_calParams_ranges_morsa_v1.csv'
param_df = pd.read_csv(fpath)

# A) Hydrology only

# # List of simulated and observed variables to include in likelihood
# comparisons = [('Reach flow (daily mean, cumecs)', ['Kure'],
#                 'Observed Q', [])]

# # comparisons = [('Reach flow (daily mean, cumecs)', ['Tarland1'], 'observed Q', [])]

# calibration = [
#  ('Initial snow depth as water equivalent', []),
#  ('Degree-day factor for snowmelt', []),
#  ('Proportion of precipitation that contributes to quick flow', []),
#  ('PET multiplication factor', []),
#  ('Soil field capacity', []),
#  ('Baseflow index', []),
#  ('Groundwater time constant', []),
#  ('Minimum groundwater flow', []),
#  ('Gradient of reach velocity-discharge relationship', []),
#  ('Soil water time constant', ['Semi-natural']),
#  ('M_Q', [])
#  ]

# n_vars = len(comparisons)

# initial_guess = cf.default_initial_guess(dataset, calibration[:-n_vars])
# #NOTE: This reads the initial guess that was provided by the parameter file, excluding any error term parameters
# # Initial guess for the residual error term
# initial_guess.append(0.5)

# labels_short = param_df['ShortName']

# # Set upper and lower limits for parameter values
# param_min = list(param_df['Min'].values)
# param_max = list(param_df['Max'].values)

# # param_min = [0.1 * x for x in initial_guess]
# # param_max = [10.0 * x for x in initial_guess]

# cf.constrain_min_max(dataset, calibration, param_min, param_max, n_vars) # NOTE: Constrain to the min and max values recommended
#                                                               # by the model in case we made our bounds too wide.

# labels_long  = ['%s, [%s]' % (cal[0], cal[1]) 
#                     for cal in calibration[:-n_vars]]
# labels_long.append('M_Q')

# -------------------------------------------------------
# B) Hydrology and TP

# List of simulated and observed variables to include in likelihood
comparisons = [('Reach flow (daily mean, cumecs)', ['Kure'],
                'Observed Q', []),
               ('Reach TP concentration', ['Kure'],
               'Observed TP at Kure', [])]

# Make calibration variable (list of (param, index) tuples to calibrate)
# NOTE: Need to automate this, struggling with converting string to list...
calibration = [
 ('Initial snow depth as water equivalent', []),
 ('Degree-day factor for snowmelt', []),
 ('Proportion of precipitation that contributes to quick flow', []),
 ('PET multiplication factor', []),
 ('Soil field capacity', []),
 ('Baseflow index', []),
 ('Groundwater time constant', []),
 ('Minimum groundwater flow', []),
 ('Gradient of reach velocity-discharge relationship', []),
 ('Soil water time constant', ['Semi-natural']),
 ('Reach sediment input scaling factor', []),
 ('Groundwater TDP concentration', []),
 ('Particulate P enrichment factor', []),
 ('Reach effluent TDP inputs', ['Kure']),
 ('Initial soil water TDP concentration and EPC0', ['Arable']),
 ('M_Q', []),
 ('M_TP', [])]

n_vars = len(comparisons)

initial_guess = cf.default_initial_guess(dataset, calibration[:-n_vars])
#NOTE: This reads the initial guess that was provided by the parameter file, excluding any error term parameters
# Initial guess for the residual error term
initial_guess.append(0.5)
initial_guess.append(0.05)

# Extract inputs
# param_min = list(param_df['Min'].dropna().values)
# param_max = list(param_df['Max'].dropna().values)

param_min = [0.1 * x for x in initial_guess]
param_max = [10.0 * x for x in initial_guess]

labels_short = param_df['ShortName']

labels_long  = ['%s, [%s]' % (cal[0], cal[1]) 
                    for cal in calibration[:-n_vars]]
labels_long.append('M_Q')
labels_long.append('M_TP')

# ###############################
# Same for just hydrol, or hydrol plus P

skiptimesteps = 30   # Skip this many first timesteps in the objective evaluation

objective = (cf.log_likelyhood, comparisons, skiptimesteps)

In [None]:
# dataset.delete()

### Run the optimizer and print the results

In [None]:
%%time
param_est = cf.run_optimization(dataset, param_min, param_max, initial_guess, calibration, objective, minimize=False)
#param_est = param_est[0]

for idx, cal in enumerate(calibration) :
    name, indexes = cal
    print('Estimated %-60s %-20s %5.2f (range [%5.2f, %5.2f])' %  (name, ', '.join(indexes),
                                                                   param_est[idx],
                                                                   param_min[idx], param_max[idx]))
if len(param_est) > len(calibration) :
    print('M: %f' % param_est[len(calibration)])

### Run with optimal parameters and plot

Run the model one more time with the optimal parameters and plot simulated output. Save figure to file, and optionally display it here too.

In [None]:
cf.set_values(dataset, param_est, calibration[:-n_vars])
# dataset.write_parameters_to_file('optimal_parameters.dat')

dataset.run_model()
fig, ax = cf.plot_objective(dataset, objective, "simplyp_plots\\Morsa_optimizer_MAP.png", return_fig=True)
plt.show()

In [None]:
# Read data from the dataset
sim = dataset.get_result_series('Reach TP concentration', ['Kure'])
obs = dataset.get_input_series('Observed TP at Kure', [], alignwithresults=True)
start_date = dt.datetime.strptime(dataset.get_parameter_time('Start date', []),'%Y-%m-%d')
timesteps = dataset.get_parameter_uint('Timesteps', [])

# Use data to make a pandas dataframe
date_idx = np.array(pd.date_range(start_date, periods=timesteps))
df = pd.DataFrame({'Date':date_idx, 'Obs_TP': obs, 'Sim_TP': sim})
df.set_index('Date', inplace=True)

# Plot
fig, ax = plt.subplots()
df.Obs_TP.plot(marker='o', ax=ax)
df.plot(figsize=(12,6), ax=ax)
ax.set_ylabel('TP concentration')
plt.show()

# comparisons = [('Reach flow (daily mean, cumecs)', ['Kure'], 'Observed Q', []),
#               ('Reach TP concentration', ['Kure'], 'Observed TP at Kure', [])]
# objective = (0,comparisons, 0)
cf.print_goodness_of_fit(dataset, objective)

In [None]:
# Work out annual sums of simulated and observed specific Q

# Read data from the dataset
simQ = dataset.get_result_series('Reach flow (daily mean, cumecs)', ['Kure'])
obsQ = dataset.get_input_series('Observed Q', [], alignwithresults=True)
start_date = dt.datetime.strptime(dataset.get_parameter_time('Start date', []),'%Y-%m-%d')
timesteps = dataset.get_parameter_uint('Timesteps', [])

# Use data to make a pandas dataframe
date_idx = np.array(pd.date_range(start_date, periods=timesteps))
df = pd.DataFrame({'Date':date_idx, 'Obs_Q': obsQ, 'Sim_Q': simQ})
df.set_index('Date', inplace=True)

df_annualSums = df.groupby(df.index.year).sum()
df_annualSums['Sim_specificQ'] = df_annualSums.Sim_Q * 86400 * 10**-3 / 304.65
df_annualSums['Obs_specificQ'] = df_annualSums.Obs_Q * 86400 * 10**-3 / 304.65

df_annualSums

In [None]:
dataset.delete()