# Reproducible Hydrological Modeling with CyberGIS-Jupyter For Water (CJW) and HydroShare Using Job Supervisor Submission Service

In [3]:
resource_id = '1f3f310af8364d2aa3e6a9459152a21c'

import json
import os
import subprocess
from hs_restclient import HydroShare, HydroShareAuthBasic

auth = HydroShareAuthBasic("cybergis", "demo")
hs = HydroShare(auth=auth)

base_dir = os.getcwd() + '/example'

print("Downloading Data")
hs.getResource(resource_id, destination=base_dir, unzip=True)
print("Done")

Downloading Data
Done


In [4]:
import tempfile

#Unzip model file
model_folder_name = "SummaModel_ReynoldsAspenStand_StomatalResistance_sopron"
content_folder = os.path.join(base_dir ,"{}/{}/data/contents".format(resource_id, resource_id))
workspace_dir = os.path.join(base_dir, 'workspace')
!mkdir -p {workspace_dir}
unzip_dir = tempfile.mkdtemp(dir=workspace_dir)
!cd {content_folder} && unzip -o {model_folder_name}.zip -d {unzip_dir}
print("Unzipping Done")

Archive:  SummaModel_ReynoldsAspenStand_StomatalResistance_sopron.zip
   creating: /Users/erichsiao/Documents/Project/CyberGIS/job-supervisor-python-sdk/example/workspace/tmp5jlvuuxk/SummaModel_ReynoldsAspenStand_StomatalResistance_sopron/
   creating: /Users/erichsiao/Documents/Project/CyberGIS/job-supervisor-python-sdk/example/workspace/tmp5jlvuuxk/SummaModel_ReynoldsAspenStand_StomatalResistance_sopron/data/
   creating: /Users/erichsiao/Documents/Project/CyberGIS/job-supervisor-python-sdk/example/workspace/tmp5jlvuuxk/SummaModel_ReynoldsAspenStand_StomatalResistance_sopron/data/forcingData/
  inflating: /Users/erichsiao/Documents/Project/CyberGIS/job-supervisor-python-sdk/example/workspace/tmp5jlvuuxk/SummaModel_ReynoldsAspenStand_StomatalResistance_sopron/data/forcingData/forcing_above_aspen.nc  
  inflating: /Users/erichsiao/Documents/Project/CyberGIS/job-supervisor-python-sdk/example/workspace/tmp5jlvuuxk/SummaModel_ReynoldsAspenStand_StomatalResistance_sopron/data/reynolds_geoj

In [5]:
model_source_folder_path = os.path.join(unzip_dir, model_folder_name)
!cd {model_source_folder_path} && chmod +x ./installTestCases_local.sh
!cd {model_source_folder_path} && ./installTestCases_local.sh

TestCases installed


In [6]:
# Create a empty ParamTrial.nc file (required by parameter ensemble)
import netCDF4 as nc
name = os.path.join(model_source_folder_path, 'settings/summa_zParamTrial_riparianAspen.nc')
print(name)
param_trial = nc.Dataset(name, "w", format="NETCDF3_CLASSIC")
param_trial.createDimension("hru", 1)
param_trial.close()

/Users/erichsiao/Documents/Project/CyberGIS/job-supervisor-python-sdk/example/workspace/tmp5jlvuuxk/SummaModel_ReynoldsAspenStand_StomatalResistance_sopron/settings/summa_zParamTrial_riparianAspen.nc


In [7]:
import numpy as np
import json
from pysumma import ensemble

def safe_arange(start, stop, step):
    a = np.arange(start, stop, step)
    result =[]
    for i in a:
        par = round(i, 10)
        result = np.append(result, par)
    return result

# create ensemble
# different parameterizations
decision_options = {
    "stomResist": ["BallBerry", "Jarvis", "simpleResistance"]
}
# different parameters
param_options = {
   'rootDistExp': safe_arange(0.01, 1.00, 0.20),
   'summerLAI': safe_arange(0.01, 10.00, 2.00)
}

config = ensemble.total_product(dec_conf=decision_options, param_conf=param_options)

# save ensemble info to json file
with open(os.path.join(model_source_folder_path, 'summa_options.json'), 'w') as outfile:
    json.dump(config, outfile)

# check ensemble parameters    
print("Number of ensemble runs: {}".format(len(config)))
print(json.dumps(config, indent=4, sort_keys=True)[:800])
print("...")

Number of ensemble runs: 75
{
    "++BallBerry++rootDistExp=0.01++summerLAI=0.01++": {
        "attributes": {},
        "decisions": {
            "stomResist": "BallBerry"
        },
        "parameters": {
            "rootDistExp": 0.01,
            "summerLAI": 0.01
        },
        "trial_parameters": {}
    },
    "++BallBerry++rootDistExp=0.01++summerLAI=2.01++": {
        "attributes": {},
        "decisions": {
            "stomResist": "BallBerry"
        },
        "parameters": {
            "rootDistExp": 0.01,
            "summerLAI": 2.01
        },
        "trial_parameters": {}
    },
    "++BallBerry++rootDistExp=0.01++summerLAI=4.01++": {
        "attributes": {},
        "decisions": {
            "stomResist": "BallBerry"
        },
        "parameters": {
            "rootDistExp": 0.01,
    
...


In [8]:
from job_supervisor_client import *

communitySummaUser = User('summa', isJupyter=True, url="localhost", port=3000)

📃 created constructor file [job_supervisor_constructor_summa.json]
👉 use [User("summa", useFileConstructor=True)] to create User interface from constructor file


In [9]:
communitySummaUser.destinations()

name,ip,port,isCommunityAccount,useUploadedFile,uploadedFileMustHave
summa,keeling.earth.illinois.edu,22,True,True,"['summa_options.json', 'installTestCases_local.sh', 'data', 'output', 'settings']"
spark,hadoop01.cigi.illinois.edu,50022,False,True,['index.py']


In [10]:
communitySummaJob = communitySummaUser.job() # create new job

In [11]:
communitySummaJob.upload(model_source_folder_path)

{'file': '1599621891bIQX'}

In [12]:
communitySummaJob.submit()

✅ job registered with ID: 1599621894O9Op


<job_supervisor_client.Job.Job at 0x1160674d0>

In [13]:
communitySummaJob.events(liveOutput=True)

📮Job ID: 1599621894O9Op
📍Destination: summa



types,message,time
JOB_QUEUED,"job [1599621894O9Op] is queued, waiting for registration",2020-09-09T03:24:54.185Z
JOB_REGISTERED,"job [1599621894O9Op] is registered with the supervisor, waiting for initialization",2020-09-09T03:24:54.768Z
SUMMA_HPC_CONNECTED,connected to HPC,2020-09-09T03:26:13.010Z
SUMMA_HPC_CONNECTED,connected to HPC,2020-09-09T03:26:24.620Z
SUMMA_HPC_SUBMITTED,submitted SUMMA job to HPC,2020-09-09T03:26:24.620Z
JOB_INITIALIZED,initialized SUMMA job in HPC job queue with remote_id 3603115,2020-09-09T03:26:24.620Z
JOB_ENDED,SUMMA job with remote_id 3603115 completed,2020-09-09T03:26:29.890Z


In [16]:
communitySummaUser.events() # get all jobs under this user

{'1599621894O9Op': [{'type': 'JOB_QUEUED',
   'message': 'job [1599621894O9Op] is queued, waiting for registration',
   'at': '2020-09-09T03:24:54.185Z'},
  {'type': 'JOB_REGISTERED',
   'message': 'job [1599621894O9Op] is registered with the supervisor, waiting for initialization',
   'at': '2020-09-09T03:24:54.768Z'},
  {'type': 'SUMMA_HPC_CONNECTED',
   'message': 'connected to HPC',
   'at': '2020-09-09T03:26:13.010Z'},
  {'type': 'SUMMA_HPC_CONNECTED',
   'message': 'connected to HPC',
   'at': '2020-09-09T03:26:24.620Z'},
  {'type': 'SUMMA_HPC_SUBMITTED',
   'message': 'submitted SUMMA job to HPC',
   'at': '2020-09-09T03:26:24.620Z'},
  {'type': 'JOB_INITIALIZED',
   'message': 'initialized SUMMA job in HPC job queue with remote_id 3603115',
   'at': '2020-09-09T03:26:24.620Z'},
  {'type': 'JOB_ENDED',
   'message': 'SUMMA job with remote_id 3603115 completed',
   'at': '2020-09-09T03:26:29.890Z'}]}

In [17]:
import os

communitySummaJob.download(os.getcwd())

In [19]:
communitySummaJob2 = communitySummaUser.job('1599621894O9Op') # recreate job obj
communitySummaJob2.events()

[{'type': 'JOB_QUEUED',
  'message': 'job [1599621894O9Op] is queued, waiting for registration',
  'at': '2020-09-09T03:24:54.185Z'},
 {'type': 'JOB_REGISTERED',
  'message': 'job [1599621894O9Op] is registered with the supervisor, waiting for initialization',
  'at': '2020-09-09T03:24:54.768Z'},
 {'type': 'SUMMA_HPC_CONNECTED',
  'message': 'connected to HPC',
  'at': '2020-09-09T03:26:13.010Z'},
 {'type': 'SUMMA_HPC_CONNECTED',
  'message': 'connected to HPC',
  'at': '2020-09-09T03:26:24.620Z'},
 {'type': 'SUMMA_HPC_SUBMITTED',
  'message': 'submitted SUMMA job to HPC',
  'at': '2020-09-09T03:26:24.620Z'},
 {'type': 'JOB_INITIALIZED',
  'message': 'initialized SUMMA job in HPC job queue with remote_id 3603115',
  'at': '2020-09-09T03:26:24.620Z'},
 {'type': 'JOB_ENDED',
  'message': 'SUMMA job with remote_id 3603115 completed',
  'at': '2020-09-09T03:26:29.890Z'}]