SemiAnalysisAI · Oseltamivir · May 3, 2026 · May 3, 2026 · May 3, 2026
@@ -2707,7 +2707,7 @@ dsv4-fp4-b300-vllm:
       - { tp: 8, ep: 8, dp-attn: true, conc-start: 2048, conc-end: 2048 }
 
 dsv4-fp4-b300-trt:
-  image: ghcr.io#semianalysisai/trtllm-deepseek-v4:feat-deepseek_v4-4999884
+  image: ghcr.io#semianalysisai/trtllm-deepseek-v4:fix-mhc7168-eb20e9e
   model: deepseek-ai/DeepSeek-V4-Pro
   model-prefix: dsv4
   runner: b300

diff --git a/benchmarks/single_node/dsv4_fp4_b300_trt.sh b/benchmarks/single_node/dsv4_fp4_b300_trt.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
 # DeepSeek-V4-Pro single-node TRTLLM recipe for B300. The configured image
-# already contains NVIDIA/TensorRT-LLM@feat/deepseek_v4; do not build TRTLLM at
+# already contains a TensorRT-LLM DeepSeek-V4 build; do not build TRTLLM at
 # runtime from this benchmark path.
 
 source "$(dirname "$0")/../benchmark_lib.sh"
@@ -101,10 +101,7 @@ if [ "${EVAL_ONLY}" = "true" ]; then
     MAX_NUM_TOKENS="$EVAL_MAX_MODEL_LEN"
 fi
 
-# DeepSeek-V4-Pro has hidden size 7168. The current TRTLLM fused-HC MHC
-# path corrupts eval generations for this shape; keep eval servers on the
-# unfused path until the fused kernel is guarded or supports 7168.
-export TRTLLM_MHC_ENABLE_FUSED_HC=0
+export TRTLLM_MHC_ENABLE_FUSED_HC="${TRTLLM_MHC_ENABLE_FUSED_HC:-1}"
 echo "TRTLLM_MHC_ENABLE_FUSED_HC: $TRTLLM_MHC_ENABLE_FUSED_HC"
 
 start_gpu_monitor --output "$PWD/gpu_metrics.csv"

diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -2148,3 +2148,10 @@
     - "Disable TRTLLM fused MHC hyper-connection for eval servers via TRTLLM_MHC_ENABLE_FUSED_HC=0 because the current fused kernel corrupts DeepSeek-V4-Pro hidden size 7168 generations"
     - "Keep this as eval-only PR validation until the TensorRT-LLM fused MHC kernel is guarded or supports hidden size 7168"
   pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1233
+
+- config-keys:
+    - dsv4-fp4-b300-trt
+  description:
+    - "Update the TensorRT-LLM DeepSeek-V4-Pro image to ghcr.io/semianalysisai/trtllm-deepseek-v4:fix-mhc7168-eb20e9e"
+    - "Enable TRTLLM fused MHC by default now that the image includes the hidden-size 7168 fused-HC fix"
+  pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1270