In [None]:
import pydsm
from pydsm import postpro

# Dask related functions
Dask uses parallel processing, which will significantly reduce runtime.
However, messages printed to stdout will not be displayed in the notebook.  
This includes messages indicating that plots will not be created for        
certain locations due to missing DSS data. These messages will be displayed 
in the conda prompt window. The use of dask with network drives is not      
recommended--some processes may fail.                                       
This notebook writes DSS files, which does not work in Windows when         
using dask. It works well in Linux. use_dask is set to False by default.    
If using Linux, setting use_dask to True will increase speed.               

In [None]:
# for Windows, should be False
use_dask = False

# Create Dask cluster

In [None]:
import dask
from dask.distributed import Client, LocalCluster

class DaskCluster:
    def __init__(self):
        self.client=None
    def start_local_cluster(self):
        cluster = LocalCluster(n_workers=8, threads_per_worker=1, memory_limit='6G') # threads_per_worker=1 needed if using numba :(
        self.client = Client(cluster)
    def stop_local_cluster(self):
        self.client.shutdown()
        self.client=None
        
def run_all(processors):
    tasks=[dask.delayed(postpro.run_processor)(processor,dask_key_name=f'{processor.study.name}::{processor.location.name}/{processor.vartype.name}') for processor in processors]
    if use_dask:
        dask.compute(tasks)
    else:
        dask.compute(tasks, scheduler='single-threaded')

# Start Dask cluster

In [None]:
cluster = DaskCluster()
cluster.start_local_cluster()
cluster.client

# Specify input DSS files

In [None]:
study_file_map = {
                'DSM2v8.2.0_noSMCD': './modelOutput/historical_v82b1.dss',
                'DSM2v8.2.0_SMCD': './modelOutput/v8_2_0_cal_extTo2019_smcd/hist_v82_19.dss'
                  }

# Setup for EC

In [None]:
for study_name in study_file_map:
    dssfile=study_file_map[study_name]
    locationfile='./LocationInfo/calibration_ec_stations.csv'
    vartype='EC'
    units='mmhos/cm'
    observed=False
    processors=postpro.build_processors(dssfile, locationfile, vartype, units, study_name, observed)
    print(f'Processing {vartype} for study: {study_name}')
    run_all(processors)

# Setup for FLOW

In [None]:
for study_name in study_file_map:    
    dssfile=study_file_map[study_name]
    locationfile='./LocationInfo/calibration_flow_stations.csv'
    vartype='FLOW'
    units='cfs'
    observed=False
    processors=postpro.build_processors(dssfile, locationfile, vartype, units, study_name, observed)
    run_all(processors)

# Setup for STAGE

In [None]:
for study_name in study_file_map:    
    dssfile=study_file_map[study_name]
    locationfile='./LocationInfo/calibration_stage_stations.csv'
    vartype='STAGE'
    units='feet'
    observed=False
    processors=postpro.build_processors(dssfile, locationfile, vartype, units, study_name, observed)
    run_all(processors)

# Stop the Dask cluster. Make sure this always runs at end of processing.

In [None]:
cluster.stop_local_cluster()