From f30728232d032f2042f8b9f1f089f85136b4f60e Mon Sep 17 00:00:00 2001 From: kaixih Date: Mon, 17 Nov 2025 18:53:20 +0000 Subject: [PATCH 1/2] Set ep = 1 for b200 --- .github/configs/nvidia-master.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index aa93f4fad..3a037f5d1 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -9,18 +9,18 @@ dsr1-fp4-b200-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 } - - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 } + - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 128 } - isl: 1024 osl: 8192 search-space: - - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 } - - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 } + - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 128 } - isl: 8192 osl: 1024 search-space: - - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 } - - { tp: 8, ep: 8, conc-start: 4, conc-end: 16 } + - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 } dsr1-fp4-b200-trt: image: nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2 @@ -83,15 +83,15 @@ dsr1-fp8-b200-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 } - isl: 1024 osl: 8192 search-space: - - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 } - isl: 8192 osl: 1024 search-space: - - { tp: 8, ep: 8, conc-start: 4, conc-end: 64 } + - { tp: 8, ep: 1, conc-start: 4, conc-end: 64 } dsr1-fp8-b200-trt: image: nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2 From 7dfc7e4ec0ddff0e4992a77bae50f1d395c01786 Mon Sep 17 00:00:00 2001 From: kaixih Date: Tue, 18 Nov 2025 19:48:14 +0000 Subject: [PATCH 2/2] Re-enable EP for fp4 --- .github/configs/nvidia-master.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 3a037f5d1..954abbba2 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -9,18 +9,18 @@ dsr1-fp4-b200-sglang: - isl: 1024 osl: 1024 search-space: - - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 } - - { tp: 8, ep: 1, conc-start: 4, conc-end: 128 } + - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 } - isl: 1024 osl: 8192 search-space: - - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 } - - { tp: 8, ep: 1, conc-start: 4, conc-end: 128 } + - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 128 } - isl: 8192 osl: 1024 search-space: - - { tp: 4, ep: 1, conc-start: 4, conc-end: 128 } - - { tp: 8, ep: 1, conc-start: 4, conc-end: 16 } + - { tp: 4, ep: 4, conc-start: 4, conc-end: 128 } + - { tp: 8, ep: 8, conc-start: 4, conc-end: 16 } dsr1-fp4-b200-trt: image: nvcr.io#nvidia/tensorrt-llm/release:1.1.0rc2.post2