# Calibration Plot Generation Notebook

In [None]:
import pydsm
from pydsm import postpro

import pydelmod
from pydelmod import calibplot
import panel as pn
import pandas as pd

In [None]:
###############################################################################
# Dask uses parallel processing, which will run several times faster          #
# However, messages printed to stdout will not be displayed in the notebook.  #
# This includes messages indicating that plots will not be created for        #
# certain locations due to missing DSS data. These messages will be displayed #
# in the conda prompt window. The use of dask with network drives is not      #
# recommended--some processes may fail.                                       #
###############################################################################
use_dask = True

###########################################################################
# Usage:                                                                  #
# 1. make sure only one of the following 3 lines is uncommented.          #
# 2. change the value of timewindow (a few cells below this one).         #
#    If the uncommented line below is for EC results, choose either the   #
#    qual calibration time period. Otherwise, choose the hydro            #
#    calibration time period.                                             #
# 3. Run all cells.                                                       #
# 4. If using dask, restart the kernel before repeating the steps above.  #
# 5. After creating all the calibration plots, copy the html and png      #
#    files to another location, and then create the validation plots.     #
###########################################################################
vartype=postpro.VarType('EC','mmhos/cm')
# vartype=postpro.VarType('FLOW','cfs')
# vartype=postpro.VarType('STAGE','ft')

# For flow plots show a 1 month period where all observed data sets have no missing values. I chose September 2011.
inst_plot_timewindow_dicts = {'FLOW': '2011-09-01:2011-09-30',
                             'EC': None, 
                             'STAGE': '2011-09-01:2011-09-30'}

## Define variable type 
(e.g. FLOW, STAGE, EC, TEMP, etc i.e. the C Part of the DSS files)

## Things that change with vartype

In [None]:
locationfile_for_vartype={'EC':'./LocationInfo/calibration_ec_stations.csv',
                         'FLOW':'./LocationInfo/calibration_flow_stations.csv',
                         'STAGE':'./LocationInfo/calibration_stage_stations.csv'}
obs_dssfile_for_vartype={'EC':'./observedData/ec_merged.dss',
                       'FLOW':'./observedData/flow_merged.dss',
                       'STAGE':'./observedData/stage_merged.dss'}

## Load locations from a .csv file 
The .csv file should have atleast 'Name','BPart' and 'Description' columns

In [None]:
locationfile=locationfile_for_vartype[vartype.name]
dfloc = postpro.load_location_file(locationfile)
locations=[postpro.Location(r['Name'],r['BPart'],r['Description']) for i,r in dfloc.iterrows()]

## Define studies
The studies are a set of a name and dss file that contains the data

In [None]:
obs_study=postpro.Study('Observed',obs_dssfile_for_vartype[vartype.name])
# No "." allowed in study names because of issue https://github.com/holoviz/holoviews/issues/4714
study_file_map = {'DSM2v8_1_2': './modelOutput/historical_v81.dss',
                  'DSM2v8_2': './modelOutput/historical_v82b1.dss'
                  }
model_studies=[postpro.Study(name,study_file_map[name]) for name in study_file_map]
studies=[obs_study]+model_studies

## Define timewindow (optional)
Time window in the format of start_date_str - end_date_str. Specify empty string if the available data should decide the time window

In [None]:
#timewindow='01OCT2013 - 01DEC2013'
timewindow=""
# calibration periods
hydro_calibration_time_window_str='01OCT2010 - 01OCT2012'
qual_calibration_time_window_str = '01OCT2009 - 01OCT2017'
# validation periods
hydro_validation_time_window_str='01OCT2000 - 01OCT2017'
qual_validation_time_window_str = '01OCT2000 - 01OCT2009'

# change the value of this variable
timewindow=qual_calibration_time_window_str


In [None]:
def save_to_png(calib_plot_template,fname):
    hvobj=calib_plot_template[1][0]
    hvobj.object=hvobj.object.opts(toolbar=None) # remove the toolbar from the second row plot
    hvobj=calib_plot_template[1][0]
    hvobj.object=hvobj.object.opts(toolbar=None) # remove the toolbar from the second row plot
    calib_plot_template.save(fname)

# Build and save plot for each location

In [None]:
def build_and_save_plot(studies, location, vartype, timewindow, write_html=False, write_png=True):
    flow_or_stage = (vartype.name == 'FLOW') or (vartype.name == 'STAGE')
    if location.name=='RSAC128-RSAC123':
        print('cross-delta flow')
        flow_or_stage = False
    flow_in_thousands = (vartype.name == 'FLOW')
        
    units=''
    # set a separate timewindow for instantaneous plots
    inst_plot_timewindow = inst_plot_timewindow_dicts[vartype.name]
    if vartype.name == 'FLOW': units='CFS'
    elif vartype.name == 'STAGE': units='FEET'
    elif vartype.name == 'EC': units='UMHOS/CM'
    calib_plot_template, metrics_df = calibplot.build_calib_plot_template(studies, location, vartype, timewindow, 
                                                            tidal_template=flow_or_stage, 
                                                            flow_in_thousands=flow_in_thousands, units=units, 
                                                            inst_plot_timewindow=inst_plot_timewindow)
    if calib_plot_template is not None and metrics_df is not None:
        if write_html: calib_plot_template.save(f'{location.name}_{vartype.name}.html')
        save_to_png(calib_plot_template,f'{location.name}_{vartype.name}.png')
    return calib_plot_template, metrics_df

In [None]:
if not use_dask:
    only_these_locations = None
    # if you want to create plots for a subset of the locations. Example
    # only_these_locations = ['RSAC155','CHVCT000']

    all_loc_metrics_df = None
    for location in locations:
        if only_these_locations is None or len(only_these_locations) == 0 or location.name in only_these_locations:
            print(location)
            plot_template, metrics_df = build_and_save_plot(studies, location, vartype, timewindow, write_html=True)
            metrics_df['Location'] = [location.name, location.name]
            # move Location column to beginning
            cols = list(metrics_df)
            cols.insert(0, cols.pop(cols.index('Location')))
            metrics_df = metrics_df.loc[:, cols]

            if all_loc_metrics_df is None:
                all_loc_metrics_df = metrics_df
            else:
                all_loc_metrics_df = all_loc_metrics_df.append(metrics_df)

    all_loc_metrics_df.to_csv('0_summary_statistics_'+vartype.name+'.csv')
    all_loc_metrics_df.to_html('0_summary_statistics_'+vartype.name+'.html')

# Start Dask Cluster

Using 8 workers here, each with a limit of 4GB

In [None]:
if use_dask:
    import dask
    from dask.distributed import Client, LocalCluster

    class DaskCluster:
        def __init__(self):
            self.client=None
        def start_local_cluster(self):
            cluster = LocalCluster(n_workers=8, threads_per_worker=1, memory_limit='4G') # threads_per_worker=1 needed if using numba :(
            self.client = Client(cluster)
        def stop_local_cluster(self):
            self.client.shutdown()
            self.client=None

In [None]:
if use_dask:
    cluster = DaskCluster()
    cluster.start_local_cluster()
    cluster.client

## Create delayed tasks

In [None]:
if use_dask:
    tasks=[dask.delayed(build_and_save_plot)(studies, location, vartype, timewindow, write_html=True, write_png=True, dask_key_name=f'{location.name}/{vartype.name}') for location in locations]

## Schedule the tasks

In [None]:
if use_dask:
    # dask.compute(tasks, scheduler='single-threaded')
    dask.compute(tasks)

In [None]:
if use_dask:
    cluster.stop_local_cluster()