diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index a15c64394..fc1ca9746 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -3160,7 +3160,7 @@ glm5-fp8-h200-sglang-mtp: - { tp: 8, conc-start: 4, conc-end: 64, spec-decoding: mtp } dsr1-fp8-h200-trt: - image: nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2 + image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc14 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: h200 @@ -3183,7 +3183,7 @@ dsr1-fp8-h200-trt: - { tp: 8, ep: 8, dp-attn: true, conc-start: 64, conc-end: 64 } dsr1-fp8-h200-trt-mtp: - image: nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2 + image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc14 model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1 runner: h200 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 00509a838..782b3db37 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2762,3 +2762,10 @@ description: - "Update TensorRT-LLM image (off: v1.2.0rc6.post2 109d / mtp: v1.2.0rc6.post3 102d) to v1.3.0rc14 (latest pre-release)" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1488 + +- config-keys: + - dsr1-fp8-h200-trt + - dsr1-fp8-h200-trt-mtp + description: + - "Update TensorRT-LLM image from v1.1.0rc2.post2 (154d/124d old) to v1.3.0rc14 (latest pre-release)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1487