Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/single_node/dsv4_fp4_mi355x_sglang.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ PYEOF
# the swiglu_limit clamp in the triton
# MoE fallback path.
export SGLANG_REASONING_EFFORT=max
export SGLANG_OPT_USE_FUSED_COMPRESS=false
export SGLANG_OPT_USE_FUSED_COMPRESS=true
export SGLANG_OPT_USE_OLD_COMPRESSOR=true
export SGLANG_OPT_USE_TILELANG_SWA_PREPARE=false
export SGLANG_OPT_USE_JIT_KERNEL_FUSED_TOPK=false
Expand All @@ -64,7 +64,7 @@ export SGLANG_OPT_USE_TILELANG_MHC_POST=false
export SGLANG_ENABLE_THINKING=1
export SGLANG_USE_AITER=1
export SGLANG_USE_ROCM700A=1
export SGLANG_TOPK_TRANSFORM_512_TORCH=1
export SGLANG_TOPK_TRANSFORM_512_TORCH=0
export SGLANG_FP8_PAGED_MQA_LOGITS_TORCH=1
export SGLANG_DSV4_FP4_EXPERTS=True
export SGLANG_OPT_DPSK_V4_RADIX=0
Expand Down
7 changes: 7 additions & 0 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2171,6 +2171,13 @@
- "ep=1 entries (dp-attn true and false) are unaffected by the EP=8 regression"
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1266

- config-keys:
- dsv4-fp4-mi355x-sglang
description:
- "Flip SGLANG_TOPK_TRANSFORM_512_TORCH from 1 to 0. The indexer's top-k step now runs the tilelang kernel instead of the torch path."
- "Flip SGLANG_OPT_USE_FUSED_COMPRESS from false to true. The DeepseekV4 compressor now goes through the fused triton path instead of the torch path."
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1275

- config-keys:
- dsv4-fp4-gb200-dynamo-vllm-mtp2
description:
Expand Down
Loading