SemiAnalysisAI · Oseltamivir · Apr 14, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
@@ -2019,7 +2019,7 @@ qwen3.5-fp8-h200-sglang:
     - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 }
 
 qwen3.5-fp8-h200-sglang-mtp:
-  image: lmsysorg/sglang:v0.5.9-cu129-amd64
+  image: lmsysorg/sglang:v0.5.10.post1
   model: Qwen/Qwen3.5-397B-A17B-FP8
   model-prefix: qwen3.5
   runner: h200

diff --git a/benchmarks/single_node/qwen3.5_fp8_h200_mtp.sh b/benchmarks/single_node/qwen3.5_fp8_h200_mtp.sh
@@ -35,7 +35,7 @@ echo "CONC: $CONC, ISL: $ISL, OSL: $OSL, MAX_MODEL_LEN: $MAX_MODEL_LEN"
 start_gpu_monitor
 
 set -x
-python3 -m sglang.launch_server \
+SGLANG_ENABLE_SPEC_V2=1 python3 -m sglang.launch_server \
   --model "$MODEL" \
   --host 0.0.0.0 \
   --port "$PORT" \
@@ -92,4 +92,4 @@ fi
 
 # Stop GPU monitoring
 stop_gpu_monitor
-set +x
+set +x
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -1298,7 +1298,7 @@
     - "Expand from tp2/tp4 to tp1/tp2/tp4/tp8 with expert parallel and dp-attn variants"
     - "Add ep2, ep4, and dp-attn configurations for higher concurrency sweeps"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/996
-  
+
 - config-keys:
     - qwen3.5-fp4-b200-sglang
   description:
@@ -1322,7 +1322,7 @@
   description:
     - "Qwen3.5 fp4 support on SGL"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1006
-
+  
 - config-keys:
     - gptoss-fp4-h200-trt
   description:
@@ -1342,3 +1342,9 @@
   description:
     - "TP2/TP4 seach space exploration for Qwen3.5 fp4 on SGL"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1022
+
+- config-keys:
+    - qwen3.5-fp8-h200-sglang-mtp
+  description:
+    - "Enable SGLANG_ENABLE_SPEC_V2=1 for Qwen3.5 FP8 H200 SGLang MTP"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1017