diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh index 4aa0ffe64e..67b79ba3ba 100644 --- a/.github/workflows/frontier/build.sh +++ b/.github/workflows/frontier/build.sh @@ -1,7 +1,7 @@ #!/bin/bash build_opts="" -if [ "$1" == "gpu" ]; then +if [ "$1" = "gpu" ]; then build_opts="--gpu" fi diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh index 058d4956d4..7c4cb059ba 100644 --- a/.github/workflows/frontier/submit.sh +++ b/.github/workflows/frontier/submit.sh @@ -13,10 +13,10 @@ else exit 1 fi -if [ "$2" == "cpu" ]; then +if [ "$2" = "cpu" ]; then sbatch_device_opts="\ #SBATCH -n 32 # Number of cores required" -elif [ "$2" == "gpu" ]; then +elif [ "$2" = "gpu" ]; then sbatch_device_opts="\ #SBATCH -n 8 # Number of cores required" else diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index 539166e055..57481fa949 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -3,7 +3,7 @@ gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' '` ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c` -if [ "$job_device" == "gpu" ]; then +if [ "$job_device" = "gpu" ]; then ./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier else ./mfc.sh test --max-attempts 3 -j 32 -- -c frontier diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index 8812e00e3b..f58ef44721 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -2,19 +2,26 @@ n_ranks=12 -if [ "$job_device" == "gpu" ]; then +if [ "$job_device" = "gpu" ]; then n_ranks=$(nvidia-smi -L | wc -l) # number of GPUs on node gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1 device_opts="--gpu -g $gpu_ids" fi -mkdir -p /storage/scratch1/6/sbryngelson3/mytmp_build -export TMPDIR=/storage/scratch1/6/sbryngelson3/mytmp_build +tmpbuild=/storage/scratch1/6/sbryngelson3/mytmp_build +currentdir=$tmpbuild/run-$(( RANDOM % 900 )) +mkdir -p $tmpbuild +mkdir -p $currentdir -if ["$job_device" == "gpu"]; then +export TMPDIR=$currentdir + +if [ "$job_device" = "gpu" ]; then ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks else ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks fi +sleep 10 +rm -rf "$currentdir" || true + unset TMPDIR diff --git a/.github/workflows/phoenix/submit-bench.sh b/.github/workflows/phoenix/submit-bench.sh index 6fba086b6e..e8b6dd3484 100644 --- a/.github/workflows/phoenix/submit-bench.sh +++ b/.github/workflows/phoenix/submit-bench.sh @@ -25,9 +25,9 @@ sbatch_gpu_opts="\ #SBATCH -G2\ " -if [ "$2" == "cpu" ]; then +if [ "$2" = "cpu" ]; then sbatch_device_opts="$sbatch_cpu_opts" -elif [ "$2" == "gpu" ]; then +elif [ "$2" = "gpu" ]; then sbatch_device_opts="$sbatch_gpu_opts" else usage diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh index 1359fe653f..6700e38c50 100644 --- a/.github/workflows/phoenix/submit.sh +++ b/.github/workflows/phoenix/submit.sh @@ -25,9 +25,9 @@ sbatch_gpu_opts="\ #SBATCH -G2\ " -if [ "$2" == "cpu" ]; then +if [ "$2" = "cpu" ]; then sbatch_device_opts="$sbatch_cpu_opts" -elif [ "$2" == "gpu" ]; then +elif [ "$2" = "gpu" ]; then sbatch_device_opts="$sbatch_gpu_opts" else usage diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh index e89af47214..5582e9f6d5 100644 --- a/.github/workflows/phoenix/test.sh +++ b/.github/workflows/phoenix/test.sh @@ -1,7 +1,7 @@ #!/bin/bash build_opts="" -if [ "$job_device" == "gpu" ]; then +if [ "$job_device" = "gpu" ]; then build_opts="--gpu" fi @@ -9,7 +9,7 @@ fi n_test_threads=8 -if [ "$job_device" == "gpu" ]; then +if [ "$job_device" = "gpu" ]; then gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1 device_opts="-g $gpu_ids"