# Calibration Plot Generation Notebook

In [None]:
import pydsm
from pydsm import postpro

import pydelmod
from pydelmod import calibplot
import panel as pn


In [None]:
###########################################################################
# Usage:                                                                  #
# 1. make sure one of the following 3 lines is uncommented                #
# 2. change the value of timewindow (a few cells below this one)          #
# 3. Don't try to use dask yet. Run one cell at a time, ending with       #
#        the cell that begins with "for location in locations:"           #
###########################################################################
# vartype=postpro.VarType('EC','mmhos/cm')
# vartype=postpro.VarType('FLOW','cfs')
vartype=postpro.VarType('STAGE','ft')

# For flow plots show a 1 month period where all observed data sets have no missing values. I chose September 2011.
inst_plot_timewindow_dicts = {'FLOW': '2011-09-01:2011-09-30',
                             'EC': None, 
                             'STAGE': '2011-09-01:2011-09-30'}

## Define variable type 
(e.g. FLOW, STAGE, EC, TEMP, etc i.e. the C Part of the DSS files)

## Things that change with vartype

In [None]:
locationfile_for_vartype={'EC':'d:/DSM2_Calibration_Notebooks/8.2/LocationInfo/calibration_ec_stations.csv',
                         'FLOW':'d:/DSM2_Calibration_Notebooks/8.2/LocationInfo/calibration_flow_stations.csv',
                         'STAGE':'d:/DSM2_Calibration_Notebooks/8.2/LocationInfo/calibration_stage_stations.csv'}
obs_dssfile_for_vartype={'EC':'d:/DSM2_Calibration_notebooks/8.2/observedData/ec_merged.dss',
                       'FLOW':'d:/DSM2_Calibration_notebooks/8.2/observedData/flow_merged.dss',
                       'STAGE':'d:/DSM2_Calibration_notebooks/8.2/observedData/stage_merged.dss'}

## Load locations from a .csv file 
The .csv file should have atleast 'Name','BPart' and 'Description' columns

In [None]:
locationfile=locationfile_for_vartype[vartype.name]
dfloc = postpro.load_location_file(locationfile)
locations=[postpro.Location(r['Name'],r['BPart'],r['Description']) for i,r in dfloc.iterrows()]
           

## Define studies
The studies are a set of a name and dss file that contains the data

In [None]:
obs_study=postpro.Study('Observed',obs_dssfile_for_vartype[vartype.name])

base_dir='e:/dsm2v82_calibration/'
model_output_dir='output/'
# No "." allowed in study names because of issue https://github.com/holoviz/holoviews/issues/4714
# study_file_map = {'DCD': base_dir+'BaselineRun/DSM2_DCD/'+model_output_dir+'historical_v81_DCD.dss',
#                   'DCD12 SMCD': base_dir+'BaselineRun/DSM2_DCD1.2_SMCD/'+model_output_dir+'historical_v82b1.dss',
#                   'DCD12 SMCD_last3': base_dir+'CalibrationRuns/last/DSM2_DCD1.2_SMCD_last3bin/'+model_output_dir+'historical_v82b1.dss',
#                   'DICU': base_dir+'BaselineRun/DSM2_DICU/'+model_output_dir+'historical_v81_DICU.dss'}
# No "." allowed in study names because of issue https://github.com/holoviz/holoviews/issues/4714
study_file_map = {'DSM2v8_1_2': 'd:/DSM2_Calibration_Notebooks/8.2/modelOutput/historical_v81.dss',
                  'DSM2v8_2': 'd:/DSM2_Calibration_Notebooks/8.2/modelOutput/historical_v82b1.dss'
                  }
model_studies=[postpro.Study(name,study_file_map[name]) for name in study_file_map]

studies=[obs_study]+model_studies


## Define timewindow (optional)
Time window in the format of start_date_str - end_date_str. Specify empty string if the available data should decide the time window

In [None]:
#timewindow='01OCT2013 - 01DEC2013'
timewindow=""
# calibration periods
hydro_calibration_time_window_str='01OCT2010 - 01OCT2012'
qual_calibration_time_window_str = '01OCT2009 - 01OCT2017'
# validation periods
hydro_validation_time_window_str='01OCT2000 - 01OCT2017'
qual_validation_time_window_str = '01OCT2000 - 01OCT2009'

timewindow=hydro_calibration_time_window_str


# Build and save plot for each location

In [None]:
# basic usage
# def build_and_save_plot(studies, location, vartype, timewindow):
#     flow_or_stage = (vartype.name == 'FLOW') or (vartype.name == 'STAGE')
#     calib_plot_template=calibplot.build_calib_plot_template(studies, location, vartype, timewindow, tidal_template=flow_or_stage)
#     calib_plot_template.save(f'{location.name}_{vartype.name}.html')

In [None]:
def build_and_save_plot(studies, location, vartype, timewindow, write_html=False):
    flow_or_stage = (vartype.name == 'FLOW') or (vartype.name == 'STAGE')
    flow_in_thousands = (vartype.name == 'FLOW')
        
    units=''
    # set a separate timewindow for instantaneous plots
    inst_plot_timewindow = inst_plot_timewindow_dicts[vartype.name]
    
    if vartype.name == 'FLOW': units='CFS'
    elif vartype.name == 'STAGE': units='FEET'
    elif vartype.name == 'EC': units='UMHOS/CM'
    calib_plot_template=calibplot.build_calib_plot_template(studies, location, vartype, timewindow, 
                                                            tidal_template=flow_or_stage, 
                                                            flow_in_thousands=flow_in_thousands, units=units, 
                                                            inst_plot_timewindow=inst_plot_timewindow)
    if write_html: calib_plot_template.save(f'{location.name}_{vartype.name}.html')
    return calib_plot_template

def save_to_png(calib_plot_template,fname):
#     hvobj=calib_plot_template[1][0]
#     hvobj.object=hvobj.object.opts(toolbar=None) # remove the toolbar from the second row plot
    calib_plot_template.save(fname)

In [None]:
for location in locations:
#     if location.name=='ROLD024':
    print(location)
    plot_template=build_and_save_plot(studies, location, vartype, timewindow, write_html=True)
    save_to_png(plot_template,f'{location.name}_{vartype.name}.png')


# Start Dask Cluster
Using 8 workers here, each with a limit of 4GB

In [None]:
import dask
from dask.distributed import Client, LocalCluster

class DaskCluster:
    def __init__(self):
        self.client=None
    def start_local_cluster(self):
        cluster = LocalCluster(n_workers=8, threads_per_worker=1, memory_limit='4G') # threads_per_worker=1 needed if using numba :(
        self.client = Client(cluster)
    def stop_local_cluster(self):
        self.client.shutdown()
        self.client=None

In [None]:
cluster = DaskCluster()
cluster.start_local_cluster()
cluster.client

## Create delayed tasks

In [None]:
tasks=[dask.delayed(build_and_save_plot)(studies, location, vartype, timewindow, dask_key_name=f'{location.name}/{vartype.name}')
           for location in locations]

## Schedule the tasks

In [None]:
#dask.compute(tasks)

In [None]:
# -- DEBUGGING -- on exception then put %debug in next cell to start debugger at exception
dask.compute(tasks, scheduler='single-threaded')

In [None]:
cluster.stop_local_cluster()