From 66d85512e319645e96308f333618bf9d7ceeb897 Mon Sep 17 00:00:00 2001 From: thomawan Date: Mon, 4 May 2026 16:53:04 +0800 Subject: [PATCH 1/3] Update dsv4 env vars for mi355 sglang --- benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh | 4 ++-- perf-changelog.yaml | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh index 642c93918..dae0fc043 100755 --- a/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh +++ b/benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh @@ -53,7 +53,7 @@ PYEOF # the swiglu_limit clamp in the triton # MoE fallback path. export SGLANG_REASONING_EFFORT=max -export SGLANG_OPT_USE_FUSED_COMPRESS=false +export SGLANG_OPT_USE_FUSED_COMPRESS=true export SGLANG_OPT_USE_OLD_COMPRESSOR=true export SGLANG_OPT_USE_TILELANG_SWA_PREPARE=false export SGLANG_OPT_USE_JIT_KERNEL_FUSED_TOPK=false @@ -64,7 +64,7 @@ export SGLANG_OPT_USE_TILELANG_MHC_POST=false export SGLANG_ENABLE_THINKING=1 export SGLANG_USE_AITER=1 export SGLANG_USE_ROCM700A=1 -export SGLANG_TOPK_TRANSFORM_512_TORCH=1 +export SGLANG_TOPK_TRANSFORM_512_TORCH=0 export SGLANG_FP8_PAGED_MQA_LOGITS_TORCH=1 export SGLANG_DSV4_FP4_EXPERTS=True export SGLANG_OPT_DPSK_V4_RADIX=0 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 524c91e67..57305ddc9 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2165,11 +2165,9 @@ - config-keys: - dsv4-fp4-mi355x-sglang description: - - "Bump image to rocm/sgl-dev:rocm720-mi35x-a8410de-20260502-DSv4 (one commit forward on amd/deepseek_v4: sglang PR #24249, fuse-compress-decode 0501)" - - "Drop the runtime sglang clone+pip overlay from benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh — the new image bakes the same a8410de6 SHA the overlay was pinning, so the overlay is redundant. Future sglang bumps now go through an image tag bump" - - "Context: ep=8 dp-attn=true entries failed gsm8k eval after #1244 merged. PR sweep (run 25246535693) reported gsm8k strict-match=0.9318 because the launcher silently dropped --ep-size and sglang ran with ep_size=1 regardless of the matrix label; post-merge sweep (run 25262278289) ran with ep_size=8 and gsm8k strict-match dropped to 0.0000. The image bump is the candidate fix to verify on rerun" - - "ep=1 entries (dp-attn true and false) are unaffected by the EP=8 regression" - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1266 + - "Flip SGLANG_TOPK_TRANSFORM_512_TORCH from 1 to 0. The indexer's top-k step now runs the tilelang kernel instead of the torch path." + - "Flip SGLANG_OPT_USE_FUSED_COMPRESS from false to true. The DeepseekV4 compressor now goes through the fused triton path instead of the torch path." + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/xxxx - config-keys: - dsv4-fp4-gb200-dynamo-vllm-mtp2 From c7b9cd8e7e6423043abd7ccdd6fd6fc63b70dcc4 Mon Sep 17 00:00:00 2001 From: Thomas Wang <1am9trash@gmail.com> Date: Mon, 4 May 2026 17:00:54 +0800 Subject: [PATCH 2/3] Update PR number --- perf-changelog.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 57305ddc9..5e65f01ee 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2167,7 +2167,7 @@ description: - "Flip SGLANG_TOPK_TRANSFORM_512_TORCH from 1 to 0. The indexer's top-k step now runs the tilelang kernel instead of the torch path." - "Flip SGLANG_OPT_USE_FUSED_COMPRESS from false to true. The DeepseekV4 compressor now goes through the fused triton path instead of the torch path." - pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/xxxx + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1275 - config-keys: - dsv4-fp4-gb200-dynamo-vllm-mtp2 From 5981f999a9c80d904b18976790005e7b72b536ac Mon Sep 17 00:00:00 2001 From: thomawan Date: Mon, 4 May 2026 17:07:11 +0800 Subject: [PATCH 3/3] Add perf log back --- perf-changelog.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 5e65f01ee..6f2a60d11 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2162,6 +2162,15 @@ - "Keep this as eval-only PR validation until the TensorRT-LLM fused MHC kernel is guarded or supports hidden size 7168" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1233 +- config-keys: + - dsv4-fp4-mi355x-sglang + description: + - "Bump image to rocm/sgl-dev:rocm720-mi35x-a8410de-20260502-DSv4 (one commit forward on amd/deepseek_v4: sglang PR #24249, fuse-compress-decode 0501)" + - "Drop the runtime sglang clone+pip overlay from benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh — the new image bakes the same a8410de6 SHA the overlay was pinning, so the overlay is redundant. Future sglang bumps now go through an image tag bump" + - "Context: ep=8 dp-attn=true entries failed gsm8k eval after #1244 merged. PR sweep (run 25246535693) reported gsm8k strict-match=0.9318 because the launcher silently dropped --ep-size and sglang ran with ep_size=1 regardless of the matrix label; post-merge sweep (run 25262278289) ran with ep_size=8 and gsm8k strict-match dropped to 0.0000. The image bump is the candidate fix to verify on rerun" + - "ep=1 entries (dp-attn true and false) are unaffected by the EP=8 regression" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1266 + - config-keys: - dsv4-fp4-mi355x-sglang description: