Skip to content

Commit

Permalink
Add SLURM batch script for single CPU/GPU job
Browse files Browse the repository at this point in the history
The flag `-u` can be passed to `savu_launcher.sh` to run a single
CPU/GPU cluster job with SLURM.
  • Loading branch information
yousefmoazzam committed Jul 11, 2023
1 parent 0f5f7d3 commit 6b1fe63
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 1 deletion.
15 changes: 14 additions & 1 deletion system_files/dls/mpi/savu_launcher.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
all_args=$*
original_command="savu_mpi $all_args"

# Define var for holding whether the job should be multi-process (regular case)
# or single-process (edge case)
single_proc_job=0

# input optional arguments
keep=false
while getopts ":t:i:s:z:ck::" opt; do
while getopts ":t:i:s:z:uck::" opt; do
case ${opt} in
s ) version=$OPTARG ;;
k ) keep=$OPTARG ;;
u ) single_proc_job=1 ;;
\? ) echo "Invalid option: $OPTARG" 1>&2 ;;
: ) echo "Invalid option: $OPTARG requires an argument" 1>&2 ;;
esac
Expand Down Expand Up @@ -91,6 +96,14 @@ function create_folder()
# get the Savu path
DIR="$(cd "$(dirname "$0")" && pwd)"
filepath=$DIR'/savu_mpijob.sh'
filepath_single=$DIR'/savu_mpijob_single.sh'
# Check if the flag for denoting a single process job (1 CPU, 1 GPU) has been
# passed
if [ $single_proc_job -eq 1 ] ; then
filepath=$filepath_single
else
filepath=$filepath
fi
savupath=$(python -c "import savu, os; print (savu.__path__[0])")
savupath=${savupath%/savu}

Expand Down
60 changes: 60 additions & 0 deletions system_files/dls/mpi/savu_mpijob_single.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/sh
#SBATCH --job-name=savu
#SBATCH --partition=cs05r
#SBATCH --account=test05r
#SBATCH --nodes=1
#SBATCH --gpus-per-node=1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=1

version=$1
shift 1
module load savu/$version

savupath=$1
datafile=$2
processfile=$3
outfile=$4
delete=$5
shift 5

# These should match the values for `--ntasks-per-node` and `--gpus-per-node`
# respectively
nCPUs=1
nGPUs=1

export PYTHONPATH=$savupath:$PYTHONPATH
filename=$savupath/savu/tomo_recon.py

UNIQHOSTS=${TMPDIR}/machines-u
#echo $HOSTNAME
awk '{print $1 }' ${PE_HOSTFILE} | uniq > ${UNIQHOSTS}
uniqslots=$(wc -l <${UNIQHOSTS})
echo "number of unique hosts: ${uniqslots}"
echo "running on these hosts:"
cat ${UNIQHOSTS}

processes=`bc <<< "$((uniqslots*nCPUs))"`

for i in $(seq 0 $((nGPUs-1))); do GPUs+="GPU$i " ; done
for i in $(seq 0 $((nCPUs-1-nGPUs))); do CPUs+="CPU$i " ; done
CPUs=$(echo $GPUs$CPUs | tr ' ' ,)
echo $CPUs

echo "Processes running are : ${processes}"

if [ ! $delete == false ]; then
delete=`readlink -f $delete`
echo "***Deleting the intermediate folder" $delete "at the end of this run"
fi
#-mca btl sm,self,openib \

# Run Savu processes
srun python $filename $datafile $processfile $outfile -n $CPUs $@

if [ ! $delete == false ]; then
cd /dls/tmp/savu
cp $delete/savu.o* $delete/../
rm -rf $delete
fi

0 comments on commit 6b1fe63

Please sign in to comment.