From fdba04197191b62787d063f87ffad12de5e63aa3 Mon Sep 17 00:00:00 2001
From: gongshaotian <gstian5555@outlook.com>
Date: Tue, 21 Oct 2025 14:55:31 +0800
Subject: [PATCH 1/2] Turn on the CUDAGraph + RL switch

---
 fastdeploy/config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/config.py b/fastdeploy/config.py
index 738f563efd6..943525e4dcf 100644
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -1513,7 +1513,6 @@ def postprocess(self):
         if (
             (self.speculative_config is not None and self.speculative_config.method is not None)
             or (self.model_config is not None and self.model_config.enable_mm is True)
-            or (self.load_config is not None and self.load_config.dynamic_load_weight is True)
             or (self.scheduler_config.splitwise_role != "mixed")
         ):
             self.graph_opt_config.use_cudagraph = False
@@ -1642,11 +1641,12 @@ def check(self):
                 assert (
                     self.model_config.enable_mm is not True
                 ), "CUDAGraph cannot be applied to multimodal model temporarily"
-        if self.graph_opt_config.graph_opt_level > 0 or self.graph_opt_config.use_cudagraph:
+        if self.graph_opt_config.graph_opt_level > 0:
             if self.load_config is not None:
                 assert (
                     self.load_config.dynamic_load_weight is False
                 ), "Static graph cannot be used in RL scene temporarily"
+
         if int(envs.ENABLE_V1_KVCACHE_SCHEDULER) == 1:
             assert (
                 int(envs.FD_DISABLED_RECOVER) == 0

From e945d4ec3d7961af426f038b82e5fdea254cc731 Mon Sep 17 00:00:00 2001
From: gongshaotian <gstian5555@outlook.com>
Date: Wed, 22 Oct 2025 20:47:15 +0800
Subject: [PATCH 2/2] reduce max_num_seqs and number of request

---
 tests/ce/stable_cases/launch_model.sh | 2 +-
 tests/ce/stable_cases/run.sh          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ce/stable_cases/launch_model.sh b/tests/ce/stable_cases/launch_model.sh
index 1850dc944dc..3b758a15a2a 100644
--- a/tests/ce/stable_cases/launch_model.sh
+++ b/tests/ce/stable_cases/launch_model.sh
@@ -38,7 +38,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
        --cache-queue-port ${FD_CACHE_QUEUE_PORT} \
        --quantization wint8 \
        --max-model-len 32768 \
-       --max-num-seqs 256 \
+       --max-num-seqs 1 \
        --gpu-memory-utilization 0.9 \
        --model "$MODEL_PATH" \
        --load-strategy ipc_snapshot \
diff --git a/tests/ce/stable_cases/run.sh b/tests/ce/stable_cases/run.sh
index 6b7f939bb6e..81197253ba5 100644
--- a/tests/ce/stable_cases/run.sh
+++ b/tests/ce/stable_cases/run.sh
@@ -12,7 +12,7 @@ PORT="${FD_API_PORT}"  # 这里需要配合启动脚本那个URL PORT
 BASE_URL="http://$HOST:$PORT"
 
 TOTAL_ROUNDS=30
-CHAT_REQUESTS_PER_ROUND=5
+CHAT_REQUESTS_PER_ROUND=1
 export CUDA_VISIBLE_DEVICES=0,1
 MAX_MEMORY_MB=10240  # 10GB