# Dask jobqueue example for JUWELS at JSC
covers the following aspects, i.e. how to
* add the JUWELS specific Dask jobqueue configuration
* get overview on available JUWELS compute node resources
* specify batch queue and project budget name
* open, scale and close a default jobqueue cluster
* do an example calculation on larger than memory data
NOTE: Currently, this **only works** if Jupyter is running **on** a **compute node**.

In [1]:
import dask, dask_jobqueue
import dask.distributed as dask_distributed

## Load jobqueue configuration defaults

In [2]:
additional_config = dask.config.collect(paths=['.']) # look up further Dask configurations in local directory
dask.config.update(dask.config.config, additional_config, priority='new');

In [3]:
dask.config.get('jobqueue.juwels-jobqueue-config')

{'cores': 96,
 'memory': '90000M',
 'processes': 1,
 'local-directory': '/tmp',
 'interface': 'ib0',
 'death-timeout': 60,
 'shebang': '#!/usr/bin/env bash',
 'walltime': '00:15:00',
 'log-directory': 'dask_jobqueue_logs',
 'name': 'dask-worker',
 'queue': None,
 'project': None,
 'job-cpu': None,
 'job-mem': None,
 'job-extra': [],
 'extra': [],
 'env-extra': []}

## Set up jobqueue cluster ...

In [4]:
!sinfo -t idle --format="%9P %.5a %.5D %.5t" # get overview on available resources per queue

PARTITION AVAIL NODES STATE
batch*       up    10  idle
mem192       up     1  idle
gpus         up     0   n/a
devel        up     2  idle
esm          up     3  idle
develgpus    up     6  idle
large      down    10  idle
maint        up    21  idle


In [5]:
jobqueue_cluster = dask_jobqueue.SLURMCluster(config_name='juwels-jobqueue-config',
                                              project='esmtst', # specify budget name associated with project
                                              queue='esm') # choose queue by available resources

In [6]:
print(jobqueue_cluster.job_script())

#!/usr/bin/env bash

#SBATCH -J dask-worker
#SBATCH -e dask_jobqueue_logs/dask-worker-%J.err
#SBATCH -o dask_jobqueue_logs/dask-worker-%J.out
#SBATCH -p esm
#SBATCH -A esmtst
#SBATCH -n 1
#SBATCH --cpus-per-task=96
#SBATCH --mem=84G
#SBATCH -t 00:15:00

JOB_ID=${SLURM_JOB_ID%;*}

/p/project/cesmtst/hoeflich1/miniconda3/envs/Dask-jobqueue_v2020.02.10/bin/python -m distributed.cli.dask_worker tcp://10.11.128.23:33762 --nthreads 96 --memory-limit 90.00GB --name name --nanny --death-timeout 60 --local-directory /tmp --interface ib0



## ... and the client process

In [7]:
client = dask_distributed.Client(jobqueue_cluster)

## Start jobqueue workers

In [8]:
jobqueue_cluster.scale(jobs=1)

In [10]:
!squeue -u hoeflich1

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           2144135       esm dask-wor hoeflich  R       0:06      1 jwc00n000
           2144134       esm interact hoeflich  R       1:24      1 jwc00n009


In [11]:
client

0,1
Client  Scheduler: tcp://10.11.128.23:33762  Dashboard: http://10.11.128.23:8787/status,Cluster  Workers: 1  Cores: 96  Memory: 90.00 GB


## Do calculation on larger than memory data

In [12]:
import dask.array as da

In [13]:
fake_data = da.random.uniform(0, 1, size=(365, 1e4, 1e4), chunks=(365,500,500)) # problem specific chunking
fake_data

Unnamed: 0,Array,Chunk
Bytes,292.00 GB,730.00 MB
Shape,"(365, 10000, 10000)","(365, 500, 500)"
Count,400 Tasks,400 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 292.00 GB 730.00 MB Shape (365, 10000, 10000) (365, 500, 500) Count 400 Tasks 400 Chunks Type float64 numpy.ndarray",10000  10000  365,

Unnamed: 0,Array,Chunk
Bytes,292.00 GB,730.00 MB
Shape,"(365, 10000, 10000)","(365, 500, 500)"
Count,400 Tasks,400 Chunks
Type,float64,numpy.ndarray


In [14]:
import time

In [15]:
start_time = time.time()
fake_data.mean(axis=0).compute()
elapsed = time.time() - start_time
print('elapse time ',elapsed,' in seconds')

elapse time  11.243258953094482  in seconds


## Close jobqueue cluster and client process

In [16]:
!squeue -u hoeflich1

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           2144135       esm dask-wor hoeflich  R       0:36      1 jwc00n000
           2144134       esm interact hoeflich  R       1:54      1 jwc00n009


In [17]:
jobqueue_cluster.close()
client.close()

In [18]:
!squeue -u hoeflich1

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           2144134       esm interact hoeflich  R       2:04      1 jwc00n009


## Conda environment

In [19]:
!conda list --explicit

# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
@EXPLICIT
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2019.11.28-hecc5488_0.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.33.1-h53a641e_8.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-7.3.0-hdf63c60_5.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-9.2.0-hdf63c60_2.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/libgomp-9.2.0-h24d8f2e_2.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-0_gnu.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-9.2.0-h24d8f2e_2.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/jpeg-9c-h14c3975_1001.tar.bz2
https://conda.anaconda.org/conda-forge/linux-64/libffi-3.2.1-he1b5a44_1006.tar.bz2
https://conda.