# Temperature Calibration Tutorial

This notebook outlines how to perform temperature model calibration for a selected subset of REWs. It is assumed that the selected subset is a unique sub-watershed of the full model watershed.

Two files are required for temperature calibration:

1. A shapefile corresponding to the sub-basin to be calibrated must be stored in `raw_data/watershed_poly`. 
2. Temperature data (in units of Celcius) stored in the `calibration_data` folder. This data must span at least the time period from `spinup_date` to `stop_date`. It is assumed that this data represents 

In [2]:

import os
import sys
from os.path import dirname
parent_dir = dirname(dirname(os.getcwd()))
sys.path.append(os.path.join(parent_dir,'StreamflowTempModel','2_hillslope_discharge'))
sys.path.append(os.path.join(parent_dir,'StreamflowTempModel','3_channel_routing'))
sys.path.append(os.path.join(parent_dir,'StreamflowTempModel','4_temperature'))


import random
from vadoseZone import *
import glob
from groundwaterZone import *
from REW import REW
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
import pickle
from datetime import date
import pandas as pd
import numpy as np
import geopandas as gp
import mpld3
import time
import sys
import shapely
import fiona
from pyDOE import *
import folium
from ast import literal_eval as make_tuple
%matplotlib inline

# Load config files, forcing file, and paramters for each group
parent_dir = os.path.dirname(os.path.dirname(os.getcwd()))

sys.path.append(os.path.join(parent_dir, 'StreamflowTempModel', '1_data_preparation'))
from prep import rew_params
rew_params()

rew_config = pickle.load( open( os.path.join(parent_dir,'model_data','rew_config.p'), "rb" ) )
climate_group_forcing = pickle.load( open( os.path.join(parent_dir,'model_data','climate_group_forcing.p'), "rb" ) )
model_config = pickle.load( open( os.path.join(parent_dir, 'model_data', 'model_config.p'), 'rb'))
temperature_params = pickle.load( open( os.path.join(parent_dir, 'model_data', 'temperature_params.p'), 'rb'))
hill_groups = pickle.load( open( os.path.join(parent_dir,'model_data','solved_hillslope_discharge.p'), "rb" ) )
solved_channel_routing = pickle.load( open( os.path.join(parent_dir,'model_data','solved_channel_routing.p'), "rb" ) )
channel_params = pickle.load( open( os.path.join(parent_dir,'model_data','channel_params.p'), "rb" ))
temperature_param_ranges = pickle.load( open( os.path.join(parent_dir, 'model_data', 'temperature_param_ranges.p'), 'rb'))

#start/stop dates for running model  
#spinup date is the date after start_date for which we assume model is finished spinning up         
start_date = model_config['start_date']
stop_date = model_config['stop_date']
spinup_date = model_config['spinup_date']
Tmax = model_config['Tmax']
dt = model_config['dt_temperature']
t = model_config['t_temperature']
resample_freq_channel = model_config['resample_freq_channel']
resample_freq_hillslope = model_config['resample_freq_hillslope']
resample_freq_temperature = model_config['resample_freq_temperature']
timestamps_hillslope = model_config['timestamps_hillslope']
timestamps_channel = model_config['timestamps_channel']
timestamps_temperature = model_config['timestamps_temperature']

## Get REWs located within calibration sub-watershed 

Here, we use the representative REW points to determine which REWs are located within the sub-watershed that we are calibrating. We want to make sure to run the model only for the REWs that are relevant for calibration. If no REWs are contained within the sub-watershed, the REW in which the sub-watershed is located will be calibrated. 

In [3]:
# must specify prefix of .shp file corresponding to subwatershed
# the .shp file must be located within the raw_data/watershed_poly folder
subwatershed_name = 'elder'

shapefile_path = os.path.join(parent_dir, 'raw_data','watershed_poly', subwatershed_name + '.shp')
points = pd.read_csv(os.path.join(parent_dir, 'raw_data','basins_centroids', 'points.csv')).set_index('cat')

# get coordinate tuples corresponding to each REW
for index, row in points.iterrows():
    new_tuple = make_tuple(points['coords'].loc[index])
    points['coords'].loc[index] = new_tuple

# check to see which REWs fall within sub-watershed
ids_in_subwatershed = []
with fiona.open(shapefile_path) as fiona_collection:
    for shapefile_record in fiona_collection:
        # note: the shapefile record must be of type polygon, not multi-polygon
        # i.e. the sub-watershed must be a single polygon
        shape = shapely.geometry.Polygon( shapefile_record['geometry']['coordinates'][0] )

        for index, row in points.iterrows(): 
            point =  shapely.geometry.Point(row[0][0], row[0][1]) # longitude, latitude
            if shape.contains(point):
                ids_in_subwatershed.append(index)

ids_in_subwatershed = list(set(ids_in_subwatershed))

# if no REWs found inside sub-watershed, 
# assume the sub-watershed is contained within a single REW. 
# Here, find the id of that REW
if len(ids_in_subwatershed)==0:
    subwatershed_shape = gp.GeoDataFrame.from_file(shapefile_path)
    basins = glob.glob(os.path.join(parent_dir,'raw_data','basins_poly','*.shp'))[0]
    with fiona.open(basins) as fiona_collection:
        for shapefile_record in fiona_collection:
            shape = shapely.geometry.Polygon( shapefile_record['geometry']['coordinates'][0] )
            if shape.contains(subwatershed_shape['geometry'].loc[0].centroid):
                ids_in_subwatershed.append(shapefile_record['properties']['cat'])
    
groups_to_calibrate = []
for rew_id in ids_in_subwatershed:
    groups_to_calibrate.append(rew_config[rew_id]['group'])
    
    
print('REWs %s are located within the calibration sub-watershed' % str(ids_in_subwatershed))


REWs [1, 2, 3] are located within the calibration sub-watershed


## Objective function

Define a function which takes modeled data and observed data (as pandas dataframes) that must be minimized for calibration. 

In [3]:
# def objective_function(modeled, observed):
#     inds = ((modeled != 0) & (observed != 0))
#     return np.abs(np.sum(np.abs(np.log(observed.loc[inds]) - np.log(modeled.loc[inds])))/np.sum(np.abs(np.log(observed.loc[inds]) - np.log(np.mean(observed.loc[inds])))))

def objective_function(modeled, observed):
    inds = ((modeled != 0) & (observed != 0))
    return np.abs(np.sum((modeled[inds]-observed[inds])**2)/np.sum((observed[inds]-np.mean(observed[inds]))**2))

In [None]:
# specify the number of parameter sets to generate
N = 100000

parameter_realz = []
for i in range(N):
    temperature_params_current = {}
    for w in parameter_group_params.keys():
        parameter_group_params_current[w] = parameter_group_params[w].copy()
        
    for j, parameter_group in enumerate(parameter_ranges.keys()):
            for k, parameter in enumerate(parameter_ranges[parameter_group].keys()):
                new_value = random.random()*(parameter_ranges[parameter_group][parameter][1] - parameter_ranges[parameter_group][parameter][0]) + parameter_ranges[parameter_group][parameter][0]
                parameter_group_params_current[parameter_group][parameter] = new_value
    parameter_realz.append(parameter_group_params_current)

print parameter_ranges[parameter_group]

{'k12': (0.05, 0.5), 'f': (0.1, 0.9), 's0R': (0, 0.4), 's0S': (0, 0.4), 'k2': (0.01, 0.1), 'k1': (0.05, 0.5), 'stR': (0.1, 0.9), 'stS': (0.1, 0.9), 'nR': (0.05, 0.2), 'nS': (0, 0.7)}


In [None]:

    
# for each parameter realization
solved_subwatersheds = []
for i in range(N):
    solved_groups = {}
    parameter_group_params = {}
    parameter_group_params = parameter_realz[i]

    solved_group_hillslopes_dict = {}
    for group_id in groups_to_calibrate:

        parameter_group_id = group_id[0]
        climate_group_id = group_id[1]

        vz = parameter_group_params[parameter_group_id]['vz'](**parameter_group_params[parameter_group_id])
        gz = parameter_group_params[parameter_group_id]['gz'](**parameter_group_params[parameter_group_id])    

        rew = REW(vz, gz,  **{'pet':climate_group_forcing[climate_group_id].pet, 'ppt':climate_group_forcing[climate_group_id].ppt, 'aspect':90})

        storageVZ    = np.zeros(np.size(t))
        storageGZ     = np.zeros(np.size(t))
        discharge       = np.zeros(np.size(t))
        leakage         = np.zeros(np.size(t))
        ET              = np.zeros(np.size(t))

        # Resample pet and ppt to integration timestep
        ppt = np.array(rew.ppt[start_date:stop_date].resample(resample_freq_hillslope).ffill())
        pet = np.array(rew.pet[start_date:stop_date].resample(resample_freq_hillslope).ffill())

        # Solve group hillslope
        for l in range(len(t)):
            rew.vz.update(dt,**{'ppt':ppt[l],'pet':pet[l]})
            storageVZ[l] = rew.vz.storageVZ
            leakage[l]      = rew.vz.leakage
            ET[l]           = rew.vz.ET   
            rew.gz.update(dt,**{'leakage':leakage[l]})
            storageGZ[l] = rew.gz.storageGZ
            discharge[l] = rew.gz.discharge

        # resample as daily data
        solved_groups[group_id] = pd.DataFrame({'discharge':discharge}, index=timestamps_hillslope).resample('D').mean()
        
    total_area = 0
    for rew_id in ids_in_subwatershed:
        total_area += rew_config[rew_id]['area_sqkm']
    
    name = str(i) + 'discharge'
    solved_subwatershed = pd.DataFrame({name:np.zeros(len(timestamps_hillslope))}, index=timestamps_hillslope).resample('D').mean()
 
    solved_subwatershed_array = np.zeros(int(len(solved_subwatershed)))
    for rew_id in ids_in_subwatershed:
        solved_subwatershed_array += rew_config[rew_id]['area_sqkm']/total_area*solved_groups[rew_config[rew_id]['group']]['discharge']
    
    solved_subwatershed[name] = solved_subwatershed_array
    solved_subwatersheds.append(solved_subwatershed)
    
solved_subwatersheds = pd.concat(solved_subwatersheds,axis=1)

## Model goodness of fit

Here, each model run is compared to calibration data using the objective function as defined above. The user must specify the pickled dataframe with calibration runoff data in units of cm/day. Calibration data must be available at least from `spinup_date` to `stop_date`. 

In [None]:
calibration_data_filename = 'elder_runoff.p'

calibration_data = pickle.load( open(os.path.join(parent_dir,'calibration_data',calibration_data_filename)))
calibration_data = calibration_data[spinup_date:stop_date]
col_name = calibration_data.columns[0]
calibration_data.columns = ['calibration_data']
df = pd.concat([calibration_data, solved_subwatersheds],1)

nses = []
for i in range(N):
    name = str(i) + 'discharge'
    if np.isfinite(np.sum(df[name][spinup_date:stop_date])):
        if int(np.sum(df[name][spinup_date:stop_date])) == 0:
            nses.append(-1)
        else:
            nses.append(objective_function( df['calibration_data'][spinup_date:stop_date], df[name][spinup_date:stop_date]))
    else:
        nses.append(-9999)
        
best_column = str(np.argmax(nses)) + 'discharge'
i = int(best_column.replace('discharge',''))
fig = plt.figure(figsize=(8,4))
plt.plot(df[['calibration_data',best_column]][spinup_date:stop_date])
plt.legend(['Calibration data', 'Best model run (NSE = %0.2f)' % np.max(nses)])
plt.xlabel('Date')
plt.ylabel('Runoff [cm/day]')
plt.title( subwatershed_name + ' subwatershed calibration results')
html = mpld3.fig_to_html(fig)

print 'The best fit parameter set has an NSE of %0.2f' % (np.max(nses))

for j, parameter_group in enumerate(parameter_ranges.keys()):
    for k, parameter in enumerate(parameter_ranges[parameter_group].keys()):
        new_value = parameter_realz[i][parameter_group][parameter]
        print 'The best fit value for parameter %s in parameter group %s is %f' % (parameter, parameter_group, new_value)



In [None]:
# Note: working in Folium, ALL projections must be converted to epsg='4326'
watershed_name = 'sf_below_tenmile'
subwatershed_name = 'elder'

#Add watershed
shapefile_path = os.path.join(parent_dir, 'raw_data','watershed_poly', watershed_name + '.shp')
basins_shape = gp.GeoDataFrame.from_file(shapefile_path).to_crs(epsg='4326')
basins_shape['coords'] = basins_shape['geometry'].apply(lambda x: x.representative_point().coords[:])
basins_shape['coords'] = [coords[0] for coords in basins_shape['coords']]
basins = basins_shape.to_crs(epsg='4326').to_json()
bounds = basins_shape.exterior.bounds

mapa = folium.Map([basins_shape['coords'][0][1], basins_shape['coords'][0][0]],
                  tiles='Stamen Terrain')

folium.GeoJson(
    basins,
    style_function=lambda feature: {
        'color' : '#00ff00',
        'fillOpacity': .05
        }
    ).add_to(mapa)


shapefile_path = os.path.join(parent_dir, 'raw_data','watershed_poly', subwatershed_name + '.shp')
basins_shape = gp.GeoDataFrame.from_file(shapefile_path).to_crs(epsg='4326')
basins_shape['coords'] = basins_shape['geometry'].apply(lambda x: x.representative_point().coords[:])
basins_shape['coords'] = [coords[0] for coords in basins_shape['coords']]
basins = basins_shape.to_crs(epsg='4326').to_json()

folium.GeoJson(
    basins,
    style_function=lambda feature: {
        'color' : '#FF0000',
        'opacity': 0.4
        }
    ).add_to(mapa)

iframe = folium.element.IFrame(html=html, width=650, height=400)
popup = folium.Popup(iframe, max_width=2650)
folium.Marker([basins_shape['coords'][0][1], basins_shape['coords'][0][0]], popup=popup, icon=folium.Icon(color='red',icon='info-sign')).add_to(mapa)

streams_path = glob.glob(os.path.join(parent_dir,'raw_data','streams_poly','*.shp'))[0]
streams_shape = gp.GeoDataFrame.from_file(streams_path).to_crs(epsg='4326')
streams = gp.GeoDataFrame(streams_shape['geometry'], crs=streams_shape.crs)
streams['RGBA'] = '#0000ff'
streams = streams.to_crs(epsg='4326').to_json()
colors = []
folium.GeoJson(
    streams,
    style_function=lambda feature: {
        'color' : feature['properties']['RGBA'],
        'weight' : 4, 
        'opacity': 1
        }
    ).add_to(mapa)


calibration_output_name = subwatershed_name + '_calibration.html'
mapa.fit_bounds([[ bounds['miny'].loc[0], bounds['minx'].loc[0]], [ bounds['maxy'].loc[0], bounds['maxx'].loc[0]]])
mapa.save(os.path.join(parent_dir, 'calibration_output', calibration_output_name))
mapa.save(os.path.join(calibration_output_name))
mapa