diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml
index 34d85fcca..6b029001d 100644
--- a/.github/configs/nvidia-master.yaml
+++ b/.github/configs/nvidia-master.yaml
@@ -180,30 +180,30 @@ gptoss-fp4-b200-trt:
   precision: fp4
   framework: trt
   multinode: false
-  # For all sequence lengths, if CONC >= 256, then EP=TP and DP_ATTN=true
   seq-len-configs:
+  # DP Attn at higher concurrencies, TP attn at middle to lower. TP=1 turns out to be highest as artifact of concurrency limit=128
   - isl: 1024
     osl: 1024
     search-space:
-    - { tp: 2, dp-attn: true, conc-start: 32, conc-end: 128 }
-    - { tp: 4, dp-attn: true, conc-start: 32, conc-end: 64 }
-    - { tp: 1, conc-start: 64, conc-end: 128 }
-    - { tp: 2, conc-start: 4, conc-end: 32 }
-    - { tp: 4, conc-start: 4, conc-end: 64 }
+    - { tp: 1, conc-start: 128, conc-end: 128 }
+    - { tp: 2, ep: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
+    - { tp: 4, ep: 4, dp-attn: true, conc-start: 64, conc-end: 64 }
+    - { tp: 2, conc-start: 8, conc-end: 32 }
+    - { tp: 4, conc-start: 4, conc-end: 16 }
     - { tp: 8, conc-start: 4, conc-end: 8 }
   - isl: 1024
     osl: 8192
     search-space:
-    - { tp: 1, conc-start: 64, conc-end: 128 }
-    - { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
-    - { tp: 2, conc-start: 4, conc-end: 128 }
+    - { tp: 2, ep: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
+    - { tp: 2, conc-start: 4, conc-end: 16 }
     - { tp: 4, conc-start: 4, conc-end: 128 }
-    - { tp: 8, conc-start: 4, conc-end: 16 }
+    - { tp: 8, conc-start: 4, conc-end: 8 }
+  # DP Attn at higher concurrencies, TP attn at middle to lower. TP=1 turns out to be highest as artifact of concurrency limit=128
   - isl: 8192
     osl: 1024
     search-space:
-    - { tp: 1, conc-start: 64, conc-end: 128 }
-    - { tp: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
+    - { tp: 1, conc-start: 128, conc-end: 128 }
+    - { tp: 2, ep: 2, dp-attn: true, conc-start: 64, conc-end: 128 }
     - { tp: 2, conc-start: 4, conc-end: 128 }
     - { tp: 4, conc-start: 4, conc-end: 32 }
     - { tp: 8, conc-start: 4, conc-end: 8 }
@@ -1047,3 +1047,274 @@ dsr1-fp4-gb200-dynamo-sglang:
         dp-attn: true
         additional-settings:
         - "DECODE_NODES=8"
+
+gptoss-fp4-gb200-dynamo-trt:
+  image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0.post2
+  model: openai/gpt-oss-120b
+  model-prefix: gptoss
+  runner: gb200
+  precision: fp4
+  framework: dynamo-trt
+  multinode: true
+  disagg: true
+  seq-len-configs:
+  - isl: 1024
+    osl: 1024
+    search-space:
+    #Right of pareto
+    #P: 1xTP1   D:1xTP4
+    - spec-decoding: "none"
+      conc-list: [ 1, 2, 4, 16, 32, 64, 128 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 1
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=256"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+# P: 1xTP1   D:4xTP2
+    - spec-decoding: "none"
+      conc-list: [ 16 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 4
+        tp: 2
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=32"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  # P: 1xTP1   D:1xDEP2
+    - spec-decoding: "none"
+      conc-list: [ 256, 512, 1024, 2048, 2560 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 1
+        tp: 2
+        ep: 2
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=1536"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  # P: 1xTP1   D:2xDEP2
+    - spec-decoding: "none"
+      conc-list: [ 512, 1024, 2048, 2560 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 2
+        tp: 2
+        ep: 2
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=1536"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  # P: 1xTP1   D:1xDEP4
+    - spec-decoding: "none"
+      conc-list: [ 256, 1024, 1536 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 1
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=512"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+# P: 1xTP1   D:3xDEP4
+    - spec-decoding: "none"
+      conc-list: [ 3072 ]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 3
+        tp: 4
+        ep: 4
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=1024"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+  - isl: 8192
+    osl: 1024
+    search-space:
+    # Right side of pareto
+    - spec-decoding: "none"
+      conc-list: [1]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 1  
+        tp: 8
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=2"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=4"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+    - spec-decoding: "none"
+      conc-list: [2, 4, 8, 16, 32, 64]
+      prefill:
+        num-worker: 1
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 1  
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=128"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+# Middle of pareto
+# P: 2xTP1   D:1xTP4
+    - spec-decoding: "none"
+      conc-list: [128, 512]
+      prefill:
+        num-worker: 2
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 1  
+        tp: 4
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=1024"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+# P: 2xTP1   D:1xTP2
+    - spec-decoding: "none"
+      conc-list: [256, 384]
+      prefill:
+        num-worker: 2
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 1  
+        tp: 2
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=512"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+
+# P: 2xTP1   D:1xDEP2
+    - spec-decoding: "none"
+      conc-list: [128, 512]
+      prefill:
+        num-worker: 2
+        tp: 1
+        ep: 1
+        dp-attn: false
+        additional-settings:
+        - "PREFILL_NODES=1"
+        - "PREFILL_MAX_NUM_TOKENS=20000"
+        - "PREFILL_MAX_BATCH_SIZE=32"
+      decode:
+        num-worker: 1
+        tp: 2
+        ep: 2
+        dp-attn: true
+        additional-settings:
+        - "DECODE_NODES=1"
+        - "DECODE_MAX_NUM_TOKENS=20000"
+        - "DECODE_MAX_BATCH_SIZE=512"
+        - "DECODE_GPU_MEM_FRACTION=0.9"
+        
\ No newline at end of file
diff --git a/benchmarks/gptoss_fp4_b200_trt_slurm.sh b/benchmarks/gptoss_fp4_b200_trt_slurm.sh
index 35ed2c58a..45bf152c5 100644
--- a/benchmarks/gptoss_fp4_b200_trt_slurm.sh
+++ b/benchmarks/gptoss_fp4_b200_trt_slurm.sh
@@ -49,9 +49,16 @@ moe_config:
 EOF
 
 if [[ "$DP_ATTENTION" == "true" ]]; then
-    export TRTLLM_MOE_ALLTOALL_BACKEND="mnnvlthroughput"
-    export TRTLLM_FORCE_ALLTOALL_METHOD="MNNVL"
-    export TRTLLM_MOE_A2A_WORKSPACE_MB="2048"
+    # DISABLE All2All for MoE TP
+    if [[ "$EP_SIZE" -eq 1 ]]; then
+        # DTP Alltoall Environment variables for EP_SIZE == 1
+        export TRTLLM_FORCE_ALLTOALL_METHOD="NotEnabled"
+    elif [[ "$EP_SIZE" -gt 1 ]]; then
+        # DEP
+        export TRTLLM_MOE_ALLTOALL_BACKEND="mnnvlthroughput"
+        export TRTLLM_FORCE_ALLTOALL_METHOD="MNNVL"
+        export TRTLLM_MOE_A2A_WORKSPACE_MB="2048"
+    fi
     cat << EOF >> $EXTRA_CONFIG_FILE
 attention_dp_config:
     enable_balance: true
diff --git a/benchmarks/gptoss_fp4_gb200_dynamo-trt_slurm.sh b/benchmarks/gptoss_fp4_gb200_dynamo-trt_slurm.sh
new file mode 100644
index 000000000..1bce1d770
--- /dev/null
+++ b/benchmarks/gptoss_fp4_gb200_dynamo-trt_slurm.sh
@@ -0,0 +1,63 @@
+#!/usr/bin/bash
+
+set -x
+
+source "$(dirname "$0")/benchmark_lib.sh"
+
+check_env_vars CONC_LIST ISL OSL IMAGE SPEC_DECODING \
+    PREFILL_NUM_WORKERS PREFILL_TP PREFILL_EP PREFILL_DP_ATTN \
+    DECODE_NUM_WORKERS DECODE_TP DECODE_EP DECODE_DP_ATTN \
+    PREFILL_MAX_NUM_TOKENS PREFILL_MAX_BATCH_SIZE DECODE_MAX_NUM_TOKENS \
+    DECODE_MAX_BATCH_SIZE DECODE_GPU_MEM_FRACTION
+
+if [[ "$SPEC_DECODING" == "mtp" ]]; then
+    check_env_vars DECODE_MTP_SIZE
+else
+    DECODE_MTP_SIZE="0"
+fi
+
+PERFORMANCE_SWEEPS_PATH="components/backends/trtllm/performance_sweeps"
+
+echo "Cloning Dynamo repository..."
+git clone https://github.com/ai-dynamo/dynamo.git
+cd dynamo
+git checkout release/0.5.1-rc0.20260105
+git submodule update --init --recursive
+
+cd "$PERFORMANCE_SWEEPS_PATH"
+
+# Set up environment variables based on ISL/OSL
+if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
+    export CACHE_TRANSCEIVER_MAX_NUM_TOKENS=1024
+elif [ "$ISL" = "8192" ] && [ "$OSL" = "1024" ]; then
+    export CACHE_TRANSCEIVER_MAX_NUM_TOKENS=8448
+else
+    echo "Unsupported ISL/OSL combination: $ISL/$OSL"
+    exit 1
+fi
+
+kind=dynamo_disagg
+additional_slurm_args="--time=04:00:00"
+ntasks_per_node=4
+
+gen_nodes=$(((DECODE_TP + 3)/4 * DECODE_NUM_WORKERS))
+total_nodes=$((PREFILL_NUM_WORKERS + gen_nodes))
+total_tasks=$((total_nodes * ntasks_per_node))
+
+decode_eplb_num_slots=0
+
+sbatch --nodes=${total_nodes} \
+    --ntasks=${total_tasks} \
+    --ntasks-per-node=${ntasks_per_node} \
+    --segment=${total_nodes} ${additional_slurm_args} \
+    benchmark_disagg.slurm \
+    ${PREFILL_NUM_WORKERS} ${PREFILL_TP} \
+    ${PREFILL_MAX_BATCH_SIZE} ${PREFILL_MAX_NUM_TOKENS} \
+    ${PREFILL_DP_ATTN} ${DECODE_NUM_WORKERS} \
+    ${DECODE_TP} ${DECODE_EP} ${DECODE_MAX_BATCH_SIZE} \
+    ${DECODE_MAX_NUM_TOKENS} ${DECODE_DP_ATTN} \
+    ${DECODE_GPU_MEM_FRACTION} ${decode_eplb_num_slots} \
+    ${DECODE_MTP_SIZE} "${CONC_LIST}" \
+    ${gen_nodes} ${kind} \
+    ${MODEL_PATH} ${SERVED_MODEL_NAME} \
+    ${IMAGE} ${ISL} ${OSL}
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
index c7f68885c..324e88d56 100644
--- a/perf-changelog.yaml
+++ b/perf-changelog.yaml
@@ -141,3 +141,11 @@
   description:
     - Use upstream SGLang images on mi300, mi325 and mi355 for dsr1fp8
   pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/332
+
+- config-keys:
+    - gptoss-fp4-gb200-dynamo-trt
+    - gptoss-fp4-b200-trt
+  description:
+    - Explicitly add EP=TP for DP attention configs for B200 AGG nvidia-master file. Multinode Refactor inadvertently changed default EP=1
+    - Add GPTOSS DISAGG configurations for GB200 1k1k and 8k1k.
+  pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/387
diff --git a/runners/launch_gb200-nv.sh b/runners/launch_gb200-nv.sh
index ff611bce8..40f6dc439 100755
--- a/runners/launch_gb200-nv.sh
+++ b/runners/launch_gb200-nv.sh
@@ -25,8 +25,16 @@ export MODEL_PATH=$MODEL
 if [[ $FRAMEWORK == "dynamo-sglang" ]]; then
     export CONFIG_DIR="/mnt/lustre01/artifacts/sglang-configs/1k1k"
     export SGL_SLURM_JOBS_PATH="dynamo/examples/backends/sglang/slurm_jobs"
-else
-    export SERVED_MODEL_NAME="deepseek-r1-fp4"
+elif [[ $FRAMEWORK == "dynamo-trt" ]]; then
+    if [[ $MODEL_PREFIX == "gptoss" ]]; then
+        export MODEL_PATH="/mnt/lustre01/models/gpt-oss-120b"
+        export SERVED_MODEL_NAME="gpt-oss-120b"
+    elif [[ $MODEL_PREFIX == "dsr1" ]]; then
+        export SERVED_MODEL_NAME="deepseek-r1-fp4"
+    else
+        echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: gptoss"
+        exit 1
+    fi
 fi
 
 export ISL="$ISL"
@@ -59,7 +67,7 @@ if [[ $FRAMEWORK == "dynamo-trt" ]]; then
     echo "Found logs directory: $LOGS_DIR"
 
     # Find all result subdirectories in this logs directory
-    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_[td]ep*_batch*_eplb*_mtp*" -type d)
+    RESULT_SUBDIRS=$(find "$LOGS_DIR" -name "ctx*_gen*_*_batch*_eplb*_mtp*" -type d)
 
     if [ -z "$RESULT_SUBDIRS" ]; then
         echo "No result subdirectories found in $LOGS_DIR"