Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions benchmarks/gptoss_fp4_mi300x_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@ if [[ "$version" == "" || $version -lt 177 ]]; then
export HSA_NO_SCRATCH_RECLAIM=1
fi

export NCCL_MIN_NCHANNELS=112
export VLLM_USE_AITER_UNIFIED_ATTENTION=1
export VLLM_ROCM_USE_AITER_MHA=0
export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
export ROCM_TRITON_MOE_PRESHUFFLE_SCALES=0
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4

set -x
Expand Down
2 changes: 0 additions & 2 deletions benchmarks/gptoss_fp4_mi300x_slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,9 @@ if [[ "$version" == "" || $version -lt 177 ]]; then
export HSA_NO_SCRATCH_RECLAIM=1
fi

export NCCL_MIN_NCHANNELS=112
export VLLM_USE_AITER_UNIFIED_ATTENTION=1
export VLLM_ROCM_USE_AITER_MHA=0
export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
export ROCM_TRITON_MOE_PRESHUFFLE_SCALES=0
export VLLM_ROCM_QUICK_REDUCE_QUANTIZATION=INT4

set -x
Expand Down
13 changes: 10 additions & 3 deletions benchmarks/gptoss_fp4_mi325x_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,19 @@
# CONC
# MAX_MODEL_LEN

export HSA_NO_SCRATCH_RECLAIM=1
export NCCL_MIN_NCHANNELS=112
# If the machine runs a MEC FW older than 177, RCCL
# cannot reclaim some memory.
# Disable that features to avoid crashes.
# This is related to the changes in the driver at:
# https://rocm.docs.amd.com/en/docs-6.4.3/about/release-notes.html#amdgpu-driver-updates
version=`rocm-smi --showfw | grep MEC | head -n 1 | awk '{print $NF}'`
if [[ "$version" == "" || $version -lt 177 ]]; then
export HSA_NO_SCRATCH_RECLAIM=1
fi

export VLLM_USE_AITER_UNIFIED_ATTENTION=1
export VLLM_ROCM_USE_AITER_MHA=0
export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
export ROCM_TRITON_MOE_PRESHUFFLE_SCALES=0

set -x
vllm serve $MODEL --port $PORT \
Expand Down
13 changes: 10 additions & 3 deletions benchmarks/gptoss_fp4_mi325x_slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,19 @@ huggingface-cli download $MODEL
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
PORT=8888

export HSA_NO_SCRATCH_RECLAIM=1
export NCCL_MIN_NCHANNELS=112
# If the machine runs a MEC FW older than 177, RCCL
# cannot reclaim some memory.
# Disable that features to avoid crashes.
# This is related to the changes in the driver at:
# https://rocm.docs.amd.com/en/docs-6.4.3/about/release-notes.html#amdgpu-driver-updates
version=`rocm-smi --showfw | grep MEC | head -n 1 | awk '{print $NF}'`
if [[ "$version" == "" || $version -lt 177 ]]; then
export HSA_NO_SCRATCH_RECLAIM=1
fi

export VLLM_USE_AITER_UNIFIED_ATTENTION=1
export VLLM_ROCM_USE_AITER_MHA=0
export VLLM_ROCM_USE_AITER_TRITON_BF16_GEMM=0
export ROCM_TRITON_MOE_PRESHUFFLE_SCALES=0

set -x
vllm serve $MODEL --port $PORT \
Expand Down
2 changes: 0 additions & 2 deletions benchmarks/gptoss_fp4_mi355x_docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ EOF
sleep 5
cat config.yaml

export HSA_NO_SCRATCH_RECLAIM=1
export NCCL_MIN_NCHANNELS=112
export VLLM_USE_AITER_UNIFIED_ATTENTION=1
export VLLM_ROCM_USE_AITER_MHA=0
export VLLM_ROCM_USE_AITER_FUSED_MOE_A16W4=1
Expand Down
3 changes: 1 addition & 2 deletions benchmarks/gptoss_fp4_mi355x_slurm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@ EOF
sleep 5
cat config.yaml

export HSA_NO_SCRATCH_RECLAIM=1
export NCCL_MIN_NCHANNELS=112
export VLLM_USE_AITER_UNIFIED_ATTENTION=1
export VLLM_ROCM_USE_AITER_MHA=0
export VLLM_ROCM_USE_AITER_FUSED_MOE_A16W4=1

set -x
vllm serve $MODEL --port $PORT \
Expand Down