PaddlePaddle · gongshaotian · Oct 23, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 22, 2025
diff --git a/fastdeploy/config.py b/fastdeploy/config.py
@@ -1510,9 +1510,7 @@ def postprocess(self):
                 self.structured_outputs_config.guided_decoding_backend = "xgrammar"
 
         # Adjustment GraphOptConfig
-        if (self.scheduler_config.splitwise_role != "mixed") or (
-            self.load_config is not None and self.load_config.dynamic_load_weight is True
-        ):
+        if self.scheduler_config.splitwise_role != "mixed":
             self.graph_opt_config.use_cudagraph = False
             logger.info(
                 "CUDAGraph does not support to be started together with PD Disaggregation temporarily, but has been automatically closed!"
@@ -1630,11 +1628,12 @@ def check(self):
             self.scheduler_config.check()
 
         # Check graph optimization config
-        if self.graph_opt_config.graph_opt_level > 0 or self.graph_opt_config.use_cudagraph:
+        if self.graph_opt_config.graph_opt_level > 0:
             if self.load_config is not None:
                 assert (
                     self.load_config.dynamic_load_weight is False
                 ), "Static graph cannot be used in RL scene temporarily"
+
         if int(envs.ENABLE_V1_KVCACHE_SCHEDULER) == 1:
             assert (
                 int(envs.FD_DISABLED_RECOVER) == 0

diff --git a/tests/ce/stable_cases/launch_model.sh b/tests/ce/stable_cases/launch_model.sh
@@ -38,7 +38,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
        --cache-queue-port ${FD_CACHE_QUEUE_PORT} \
        --quantization wint8 \
        --max-model-len 32768 \
-       --max-num-seqs 256 \
+       --max-num-seqs 1 \
        --gpu-memory-utilization 0.9 \
        --model "$MODEL_PATH" \
        --load-strategy ipc_snapshot \

diff --git a/tests/ce/stable_cases/run.sh b/tests/ce/stable_cases/run.sh
@@ -12,7 +12,7 @@ PORT="${FD_API_PORT}"  # 这里需要配合启动脚本那个URL PORT
 BASE_URL="http://$HOST:$PORT"
 
 TOTAL_ROUNDS=30
-CHAT_REQUESTS_PER_ROUND=5
+CHAT_REQUESTS_PER_ROUND=1
 export CUDA_VISIBLE_DEVICES=0,1
 MAX_MEMORY_MB=10240  # 10GB