From 6252ee4ecde2a8a69ccc0425cd3ef3cf4b0e1010 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Fri, 27 Jun 2025 11:29:52 -0400 Subject: [PATCH 1/4] Update bench.sh --- .github/workflows/phoenix/bench.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index 8812e00e3b..80c803cc89 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -17,4 +17,6 @@ else ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks fi +rm -rf $TMPDIR || true + unset TMPDIR From e0e98c56088e63dc6bcf5ce480e01e05c452ca89 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Fri, 27 Jun 2025 11:35:53 -0400 Subject: [PATCH 2/4] Update bench.sh --- .github/workflows/phoenix/bench.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index 80c803cc89..005060f86d 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -8,8 +8,12 @@ if [ "$job_device" == "gpu" ]; then device_opts="--gpu -g $gpu_ids" fi -mkdir -p /storage/scratch1/6/sbryngelson3/mytmp_build -export TMPDIR=/storage/scratch1/6/sbryngelson3/mytmp_build +tmpbuild=/storage/scratch1/6/sbryngelson3/mytmp_build +currentdir=$tmpbuild/run-$(( RANDOM % 900 )) +mkdir -p $tmpbuild +mkdir -p $currentdir + +export TMPDIR=$currentdir if ["$job_device" == "gpu"]; then ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks @@ -17,6 +21,7 @@ else ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks fi -rm -rf $TMPDIR || true +sleep 10 +rm -rf $currentdir || true unset TMPDIR From d80e156df83b52a5ae6b3d56890d488bca45f770 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Fri, 27 Jun 2025 13:16:14 -0400 Subject: [PATCH 3/4] fix up some syntax to be posix generic --- .github/workflows/frontier/build.sh | 2 +- .github/workflows/frontier/submit.sh | 4 ++-- .github/workflows/frontier/test.sh | 2 +- .github/workflows/phoenix/bench.sh | 4 ++-- .github/workflows/phoenix/submit-bench.sh | 4 ++-- .github/workflows/phoenix/submit.sh | 4 ++-- .github/workflows/phoenix/test.sh | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/frontier/build.sh b/.github/workflows/frontier/build.sh index 4aa0ffe64e..67b79ba3ba 100644 --- a/.github/workflows/frontier/build.sh +++ b/.github/workflows/frontier/build.sh @@ -1,7 +1,7 @@ #!/bin/bash build_opts="" -if [ "$1" == "gpu" ]; then +if [ "$1" = "gpu" ]; then build_opts="--gpu" fi diff --git a/.github/workflows/frontier/submit.sh b/.github/workflows/frontier/submit.sh index 058d4956d4..7c4cb059ba 100644 --- a/.github/workflows/frontier/submit.sh +++ b/.github/workflows/frontier/submit.sh @@ -13,10 +13,10 @@ else exit 1 fi -if [ "$2" == "cpu" ]; then +if [ "$2" = "cpu" ]; then sbatch_device_opts="\ #SBATCH -n 32 # Number of cores required" -elif [ "$2" == "gpu" ]; then +elif [ "$2" = "gpu" ]; then sbatch_device_opts="\ #SBATCH -n 8 # Number of cores required" else diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index 539166e055..57481fa949 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -3,7 +3,7 @@ gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' '` ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c` -if [ "$job_device" == "gpu" ]; then +if [ "$job_device" = "gpu" ]; then ./mfc.sh test --max-attempts 3 -j $ngpus -- -c frontier else ./mfc.sh test --max-attempts 3 -j 32 -- -c frontier diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index 005060f86d..b5b197f08a 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -2,7 +2,7 @@ n_ranks=12 -if [ "$job_device" == "gpu" ]; then +if [ "$job_device" = "gpu" ]; then n_ranks=$(nvidia-smi -L | wc -l) # number of GPUs on node gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1 device_opts="--gpu -g $gpu_ids" @@ -15,7 +15,7 @@ mkdir -p $currentdir export TMPDIR=$currentdir -if ["$job_device" == "gpu"]; then +if [[ "$job_device" == "gpu" ]]; then ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks else ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks diff --git a/.github/workflows/phoenix/submit-bench.sh b/.github/workflows/phoenix/submit-bench.sh index 6fba086b6e..e8b6dd3484 100644 --- a/.github/workflows/phoenix/submit-bench.sh +++ b/.github/workflows/phoenix/submit-bench.sh @@ -25,9 +25,9 @@ sbatch_gpu_opts="\ #SBATCH -G2\ " -if [ "$2" == "cpu" ]; then +if [ "$2" = "cpu" ]; then sbatch_device_opts="$sbatch_cpu_opts" -elif [ "$2" == "gpu" ]; then +elif [ "$2" = "gpu" ]; then sbatch_device_opts="$sbatch_gpu_opts" else usage diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh index 1359fe653f..6700e38c50 100644 --- a/.github/workflows/phoenix/submit.sh +++ b/.github/workflows/phoenix/submit.sh @@ -25,9 +25,9 @@ sbatch_gpu_opts="\ #SBATCH -G2\ " -if [ "$2" == "cpu" ]; then +if [ "$2" = "cpu" ]; then sbatch_device_opts="$sbatch_cpu_opts" -elif [ "$2" == "gpu" ]; then +elif [ "$2" = "gpu" ]; then sbatch_device_opts="$sbatch_gpu_opts" else usage diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh index e89af47214..5582e9f6d5 100644 --- a/.github/workflows/phoenix/test.sh +++ b/.github/workflows/phoenix/test.sh @@ -1,7 +1,7 @@ #!/bin/bash build_opts="" -if [ "$job_device" == "gpu" ]; then +if [ "$job_device" = "gpu" ]; then build_opts="--gpu" fi @@ -9,7 +9,7 @@ fi n_test_threads=8 -if [ "$job_device" == "gpu" ]; then +if [ "$job_device" = "gpu" ]; then gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1 device_opts="-g $gpu_ids" From a69dba3308e51aa0964664d98494e2fefe0fce16 Mon Sep 17 00:00:00 2001 From: Spencer Bryngelson Date: Fri, 27 Jun 2025 13:20:37 -0400 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .github/workflows/phoenix/bench.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh index b5b197f08a..f58ef44721 100644 --- a/.github/workflows/phoenix/bench.sh +++ b/.github/workflows/phoenix/bench.sh @@ -15,13 +15,13 @@ mkdir -p $currentdir export TMPDIR=$currentdir -if [[ "$job_device" == "gpu" ]]; then +if [ "$job_device" = "gpu" ]; then ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks else ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks fi sleep 10 -rm -rf $currentdir || true +rm -rf "$currentdir" || true unset TMPDIR