# Dask jobqueue example for NEC Linux cluster
covers the following aspects, i.e. how to
* load project and machine specific Dask jobqueue configurations
* open, scale and close a default jobqueue cluster
* do an example calculation on larger than memory data

## Load jobqueue configuration defaults

In [1]:
import os 
os.environ['DASK_CONFIG']='.' # use local directory to look up Dask configurations

In [2]:
import dask.config
dask.config.get('jobqueue') # prints available jobqueue configurations

{'nesh-jobqueue-config': {'cores': 4,
  'memory': '24GB',
  'processes': 1,
  'queue': 'clmedium',
  'resource_spec': 'elapstim_req=00:45:00,cpunum_job=4,memsz_job=24gb',
  'interface': 'ib0',
  'local-directory': '/scratch',
  'walltime': None,
  'job-extra': ['-o dask_jobqueue_logs/dask-worker.o%s',
   '-e dask_jobqueue_logs/dask-worker.e%s'],
  'project': None,
  'name': 'dask-worker',
  'death-timeout': 60,
  'extra': [],
  'env-extra': [],
  'log-directory': None,
  'shebang': '#!/bin/bash'}}

## Set up jobqueue cluster ...

In [3]:
import dask_jobqueue
default_cluster = dask_jobqueue.PBSCluster(config_name='nesh-jobqueue-config')

In [4]:
print(default_cluster.job_script())

#!/bin/bash

#PBS -N dask-worker
#PBS -q clmedium
#PBS -l elapstim_req=00:45:00,cpunum_job=4,memsz_job=24gb
#PBS -o dask_jobqueue_logs/dask-worker.o%s
#PBS -e dask_jobqueue_logs/dask-worker.e%s
JOB_ID=${PBS_JOBID%%.*}

/sfs/fs6/home-geomar/smomw260/miniconda3/envs/dask-minimal-20191218/bin/python -m distributed.cli.dask_worker tcp://192.168.31.10:32956 --nthreads 4 --memory-limit 24.00GB --name name --nanny --death-timeout 60 --local-directory /scratch --interface ib0



## ... and the client process

In [5]:
import dask.distributed as dask_distributed
default_cluster_client = dask_distributed.Client(default_cluster)

## Start jobqueue workers

In [6]:
default_cluster.scale(jobs=2)

In [7]:
!qstat

RequestID       ReqName  UserName Queue     Pri STT S   Memory      CPU   Elapse R H M Jobs
--------------- -------- -------- -------- ---- --- - -------- -------- -------- - - - ----
182478.nesh-bat dask-wor smomw260 clmedium    0 RUN -    0.00B     0.00        8 Y Y Y    1 
182479.nesh-bat dask-wor smomw260 clmedium    0 RUN -    0.00B     0.00        8 Y Y Y    1 


In [8]:
default_cluster_client

0,1
Client  Scheduler: tcp://192.168.31.10:32956  Dashboard: http://192.168.31.10:8787/status,Cluster  Workers: 2  Cores: 8  Memory: 48.00 GB


## Do calculation on larger than memory data

In [9]:
import dask.array as da

In [10]:
fake_data = da.random.uniform(0, 1, size=(365, 1e4, 1e4), chunks=(365,500,500)) # problem specific chunking
fake_data

Unnamed: 0,Array,Chunk
Bytes,292.00 GB,730.00 MB
Shape,"(365, 10000, 10000)","(365, 500, 500)"
Count,400 Tasks,400 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 292.00 GB 730.00 MB Shape (365, 10000, 10000) (365, 500, 500) Count 400 Tasks 400 Chunks Type float64 numpy.ndarray",10000  10000  365,

Unnamed: 0,Array,Chunk
Bytes,292.00 GB,730.00 MB
Shape,"(365, 10000, 10000)","(365, 500, 500)"
Count,400 Tasks,400 Chunks
Type,float64,numpy.ndarray


In [11]:
import time

In [12]:
start_time = time.time()
fake_data.mean(axis=0).compute()
elapsed = time.time() - start_time

In [13]:
print('elapse time ',elapsed,' in seconds')

elapse time  46.89112448692322  in seconds


## Close jobqueue cluster and client process

In [14]:
!qstat

RequestID       ReqName  UserName Queue     Pri STT S   Memory      CPU   Elapse R H M Jobs
--------------- -------- -------- -------- ---- --- - -------- -------- -------- - - - ----
182478.nesh-bat dask-wor smomw260 clmedium    0 RUN -    2.58G   157.12       94 Y Y Y    1 
182479.nesh-bat dask-wor smomw260 clmedium    0 RUN -  804.87M   157.49       94 Y Y Y    1 


In [15]:
default_cluster.close()
default_cluster_client.close()

In [16]:
!qstat