/
submit_ritoy_P3.slurm
82 lines (69 loc) · 2.42 KB
/
submit_ritoy_P3.slurm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash
#!
#! Dask job script for P3
#! Tim Cornwell
#!
#!#############################################################
#!#### Modify the options in this section as appropriate ######
#!#############################################################
#! sbatch directives begin here ###############################
#! Name of the job:
#SBATCH -J SDP_ARL
#! Which project should be charged:
#SBATCH -A SKA-SDP
#! How many whole nodes should be allocated?
#SBATCH --nodes=8
#! How many (MPI) tasks will there be in total? (<= nodes*16)
#SBATCH --ntasks=16
#! Memory limit
##SBATCH --mem 50000
#! How much wallclock time will be required?
#SBATCH --time=00:20:00
#! What types of email messages do you wish to receive?
#SBATCH --mail-type=FAIL
#! Uncomment this to prevent the job from being requeued (e.g. if
#! interrupted by node failure or system downtime):
##SBATCH --no-requeue
#! Do not change:
#SBATCH -p compute
#! What types of email messages do you wish to receive?
#SBATCH --mail-type=FAIL
#! Uncomment this to prevent the job from being requeued (e.g. if
#! interrupted by node failure or system downtime):
##SBATCH --no-requeue
#! Modify the settings below to specify the application's environment, location
#! and launch method:
#! Optionally modify the environment seen by the application
#! (note that SLURM reproduces the environment at submission irrespective of ~/.bashrc):
module purge # Removes all modules still loaded
#! Set up python
export PYTHONPATH=$PYTHONPATH:$ARL
echo "PYTHONPATH is ${PYTHONPATH}"
echo -e "Running python: `which python`"
echo -e "Running dask-scheduler: `which dask-scheduler`"
cd $SLURM_SUBMIT_DIR
echo -e "Changed directory to `pwd`.\n"
module load slurm
JOBID=${SLURM_JOB_ID}
echo ${SLURM_JOB_NODELIST}
#! Create a hostfile:
scontrol show hostnames $SLURM_JOB_NODELIST | uniq > hostfile.$JOBID
scheduler=$(head -1 hostfile.$JOBID)
hostIndex=0
for host in `cat hostfile.$JOBID`; do
echo "Working on $host ...."
if [ "$hostIndex" = "0" ]; then
echo "run dask-scheduler"
ssh $host dask-scheduler --port=8786 &
sleep 5
fi
echo "run dask-worker"
ssh $host dask-worker --host ${host} --nprocs 2 --nthreads 1 \
--memory-limit 100GB $scheduler:8786 &
sleep 1
hostIndex="1"
done
echo "Scheduler and workers now running"
CMD="python ./cluster_test_ritoy.py ${scheduler}:8786 | tee ritoy.log"
echo "About to execute $CMD"
eval $CMD