SemiAnalysisAI · functionstackx · May 19, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 23, 2026
@@ -3125,7 +3125,7 @@ glm5-fp8-h200-sglang:
   image: lmsysorg/sglang:v0.5.12-cu130
   model: zai-org/GLM-5-FP8
   model-prefix: glm5
-  runner: h200
+  runner: h200-dgxc
   precision: fp8
   framework: sglang
   multinode: false

diff --git a/benchmarks/single_node/glm5_fp8_h200.sh b/benchmarks/single_node/glm5_fp8_h200.sh
@@ -42,6 +42,7 @@ python3 -m sglang.launch_server \
   --mem-fraction-static 0.85 \
   --served-model-name glm-5-fp8 \
   --trust-remote-code \
+  --enable-flashinfer-allreduce-fusion \
   $EVAL_CONTEXT_ARGS > "$SERVER_LOG" 2>&1 &
 
 SERVER_PID=$!

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -2957,3 +2957,10 @@
     - "Following recipe from https://github.com/vllm-project/recipes/pull/433"
     - "Add DEP8 dp-attn=true validation probes at conc=64 for 1k1k and 8k1k"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1374
+
+- config-keys:
+    - glm5-fp8-h200-sglang
+  description:
+    - "Update SGLang image from glm5-hopper to v0.5.10.post1-cu130"
+    - "Add --enable-flashinfer-allreduce-fusion to server launch"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1033