In [5]:
import dask.distributed as dd
from dask.distributed import Client, LocalCluster, progress
from dask_jobqueue import PBSCluster
#from distributed.utils import tmpfile
from dask.distributed import get_worker
import os

In [6]:
# create the user directory if it doesn't already exist
! mkdir -p /scratch/vp91/$USER

In [7]:
# set the path 
user = os.getenv('USER', 'default value')
path = '/scratch/vp91/'+user
print(path)

/scratch/vp91/jxj900


In [8]:
# The jupyter notebook is launched from your $HOME directory.
# Change the working directory the user directory under /scratch/vp91
os.chdir(os.path.expandvars(path))

In [9]:
# Make sure the python we use is from the venv
os.environ['DASK_PYTHON'] = '/scratch/vp91/Training-Venv/dask/dask-venv/bin/python3'

In [10]:
# Make sure all the modules are loaded.
# It is essential that we use the same python and library for all aspects of dask
# If we dont activate the venv then the workers may have a different versions of libraries
setup_commands = ["module load python3/3.11.0", "source /scratch/vp91/Training-Venv/dask/dask-venv/bin/activate"]

In [11]:
# Gadi use custom PBS directives
# So some of the default values to launch a PBS job through Dask call will not work in Gadi
# Any directive specific to gadi should be mentioned here.
# refer : https://opus.nci.org.au/display/Help/Gadi+Quick+Reference+Guide
extra = ['-q normal',
         '-P vp91', 
         '-l ncpus=48', 
         '-l mem=192GB']

In [12]:
# walltime: Walltime for each worker job.
# cores: Total number of cores per job.
# shebang: Path to desired interpreter for your batch submission script.
# job_extra_directives: List of other PBS options. Each option will be prepended with the #PBS prefix.
# local_directory: Dask worker local directory for file spilling.
# job_directives_skip: Directives to skip in the generated job script header. Directives lines containing 
#                      the specified strings will be removed. Directives added by job_extra_directives 
#                      won’t be affected.
# interface: Network interface like ‘eth0’ or ‘ib0’. This will be used both for the Dask scheduler and 
#            the Dask workers interface
# job_script_prologue: Commands to add to script before launching worker
# python: Python executable used to launch Dask workers. Defaults to the Python that is submitting these jobs



cluster = PBSCluster(walltime="00:50:00", 
                     cores=48, 
                     memory="192GB",
                     shebang='#!/usr/bin/env bash',
                     job_extra_directives=extra, 
                     local_directory='$TMPDIR', 
                     job_directives_skip=["select"], 
                     interface="ib0",
                     job_script_prologue=setup_commands,
                     python=os.environ["DASK_PYTHON"])

Perhaps you already have a cluster running?
Hosting the HTTP server on port 41613 instead


In [13]:
print(cluster.job_script())

#!/usr/bin/env bash

#PBS -N dask-worker
#PBS -l walltime=00:50:00
#PBS -q normal
#PBS -P vp91
#PBS -l ncpus=48
#PBS -l mem=192GB
module load python3/3.11.0
source /scratch/vp91/Training-Venv/dask/dask-venv/bin/activate
/scratch/vp91/Training-Venv/dask/dask-venv/bin/python3 -m distributed.cli.dask_worker tcp://10.6.66.36:35613 --nthreads 6 --nworkers 8 --memory-limit 22.35GiB --name dummy-name --nanny --death-timeout 60 --local-directory $TMPDIR --interface ib0



In [14]:
# create a cluster with 2 nodes
cluster.scale(jobs=2)

In [16]:
# Verify the workers have been allocated as expected
!qstat

Job id                 Name             User              Time Use S Queue
---------------------  ---------------- ----------------  -------- - -----
100969196.gadi-pbs     sys-dashboard-s* jxj900            00:08:03 R normal-exec     
100974719.gadi-pbs     dask-worker      jxj900            00:00:00 R normal-exec     
100974720.gadi-pbs     dask-worker      jxj900            00:00:00 R normal-exec     


In [17]:
cluster

0,1
Dashboard: /proxy/41613/status,Workers: 16
Total threads: 96,Total memory: 357.60 GiB

0,1
Comm: tcp://10.6.66.36:35613,Workers: 16
Dashboard: /proxy/41613/status,Total threads: 96
Started: 1 minute ago,Total memory: 357.60 GiB

0,1
Comm: tcp://10.6.24.16:46733,Total threads: 6
Dashboard: /proxy/39421/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:44303,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-ei9k55gl,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-ei9k55gl

0,1
Comm: tcp://10.6.24.16:46435,Total threads: 6
Dashboard: /proxy/41303/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:33655,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-ac3e7fxp,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-ac3e7fxp

0,1
Comm: tcp://10.6.24.16:32909,Total threads: 6
Dashboard: /proxy/41923/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:41383,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-1zdnw25f,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-1zdnw25f

0,1
Comm: tcp://10.6.24.16:35055,Total threads: 6
Dashboard: /proxy/44795/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:33557,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-w8ctt155,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-w8ctt155

0,1
Comm: tcp://10.6.24.16:37881,Total threads: 6
Dashboard: /proxy/45279/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:33221,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-26kjaq3k,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-26kjaq3k

0,1
Comm: tcp://10.6.24.16:39661,Total threads: 6
Dashboard: /proxy/34669/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:46289,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-03kyyrkd,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-03kyyrkd

0,1
Comm: tcp://10.6.24.16:38251,Total threads: 6
Dashboard: /proxy/45727/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:44515,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-jmdcp827,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-jmdcp827

0,1
Comm: tcp://10.6.24.16:33593,Total threads: 6
Dashboard: /proxy/39947/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:42159,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-l9t0buy1,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-l9t0buy1

0,1
Comm: tcp://10.6.24.15:46451,Total threads: 6
Dashboard: /proxy/38027/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:35203,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-n83tblqu,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-n83tblqu

0,1
Comm: tcp://10.6.24.15:35593,Total threads: 6
Dashboard: /proxy/43187/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:36089,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-o7kiy6vf,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-o7kiy6vf

0,1
Comm: tcp://10.6.24.15:43023,Total threads: 6
Dashboard: /proxy/45343/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:41267,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-z89x93a8,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-z89x93a8

0,1
Comm: tcp://10.6.24.15:46323,Total threads: 6
Dashboard: /proxy/46183/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:43709,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-zyjhx1vl,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-zyjhx1vl

0,1
Comm: tcp://10.6.24.15:34533,Total threads: 6
Dashboard: /proxy/42755/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:46537,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-cjkp0inq,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-cjkp0inq

0,1
Comm: tcp://10.6.24.15:34575,Total threads: 6
Dashboard: /proxy/37233/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:40333,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-pb8z4wv2,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-pb8z4wv2

0,1
Comm: tcp://10.6.24.15:44109,Total threads: 6
Dashboard: /proxy/44283/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:39531,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-gp31u3bk,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-gp31u3bk

0,1
Comm: tcp://10.6.24.15:34247,Total threads: 6
Dashboard: /proxy/34977/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:36977,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-mzjd0xjl,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-mzjd0xjl


In [18]:
# create the client
client = Client(cluster)

In [19]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/41613/status,

0,1
Dashboard: /proxy/41613/status,Workers: 16
Total threads: 96,Total memory: 357.60 GiB

0,1
Comm: tcp://10.6.66.36:35613,Workers: 16
Dashboard: /proxy/41613/status,Total threads: 96
Started: 1 minute ago,Total memory: 357.60 GiB

0,1
Comm: tcp://10.6.24.16:46733,Total threads: 6
Dashboard: /proxy/39421/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:44303,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-ei9k55gl,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-ei9k55gl

0,1
Comm: tcp://10.6.24.16:46435,Total threads: 6
Dashboard: /proxy/41303/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:33655,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-ac3e7fxp,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-ac3e7fxp

0,1
Comm: tcp://10.6.24.16:32909,Total threads: 6
Dashboard: /proxy/41923/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:41383,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-1zdnw25f,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-1zdnw25f

0,1
Comm: tcp://10.6.24.16:35055,Total threads: 6
Dashboard: /proxy/44795/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:33557,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-w8ctt155,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-w8ctt155

0,1
Comm: tcp://10.6.24.16:37881,Total threads: 6
Dashboard: /proxy/45279/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:33221,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-26kjaq3k,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-26kjaq3k

0,1
Comm: tcp://10.6.24.16:39661,Total threads: 6
Dashboard: /proxy/34669/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:46289,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-03kyyrkd,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-03kyyrkd

0,1
Comm: tcp://10.6.24.16:38251,Total threads: 6
Dashboard: /proxy/45727/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:44515,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-jmdcp827,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-jmdcp827

0,1
Comm: tcp://10.6.24.16:33593,Total threads: 6
Dashboard: /proxy/39947/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.16:42159,
Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-l9t0buy1,Local directory: /jobfs/100974720.gadi-pbs/dask-scratch-space/worker-l9t0buy1

0,1
Comm: tcp://10.6.24.15:46451,Total threads: 6
Dashboard: /proxy/38027/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:35203,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-n83tblqu,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-n83tblqu

0,1
Comm: tcp://10.6.24.15:35593,Total threads: 6
Dashboard: /proxy/43187/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:36089,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-o7kiy6vf,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-o7kiy6vf

0,1
Comm: tcp://10.6.24.15:43023,Total threads: 6
Dashboard: /proxy/45343/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:41267,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-z89x93a8,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-z89x93a8

0,1
Comm: tcp://10.6.24.15:46323,Total threads: 6
Dashboard: /proxy/46183/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:43709,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-zyjhx1vl,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-zyjhx1vl

0,1
Comm: tcp://10.6.24.15:34533,Total threads: 6
Dashboard: /proxy/42755/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:46537,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-cjkp0inq,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-cjkp0inq

0,1
Comm: tcp://10.6.24.15:34575,Total threads: 6
Dashboard: /proxy/37233/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:40333,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-pb8z4wv2,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-pb8z4wv2

0,1
Comm: tcp://10.6.24.15:44109,Total threads: 6
Dashboard: /proxy/44283/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:39531,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-gp31u3bk,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-gp31u3bk

0,1
Comm: tcp://10.6.24.15:34247,Total threads: 6
Dashboard: /proxy/34977/status,Memory: 22.35 GiB
Nanny: tcp://10.6.24.15:36977,
Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-mzjd0xjl,Local directory: /jobfs/100974719.gadi-pbs/dask-scratch-space/worker-mzjd0xjl


cluster.get_logs()

In [20]:
# A simple test function
def slow_increment(x):
    return x+1

In [21]:
# Submit the work to the Dask cluster
futures = client.submit(slow_increment, 5000)

In [22]:
futures

In [23]:
futures.result()

5001