From 8b512f709d474f648041f8791e1a224310df76f7 Mon Sep 17 00:00:00 2001 From: juncaipeng <13006307475@163.com> Date: Thu, 20 Nov 2025 11:49:53 +0000 Subject: [PATCH] Fix schedule error in splitwise deployment --- fastdeploy/engine/sched/resource_manager_v1.py | 2 ++ fastdeploy/scheduler/local_scheduler.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index 2d6641ed91c..8ce4ac909d6 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -514,6 +514,8 @@ def schedule(self): error_reqs: list[tuple[str, str]] = [] token_budget = self.config.scheduler_config.max_num_batched_tokens + self.check_and_free_block_tables() + # First, schedule the RUNNING requests. req_index = 0 num_decoding_req_nums = 0 diff --git a/fastdeploy/scheduler/local_scheduler.py b/fastdeploy/scheduler/local_scheduler.py index 548789f7a79..8684270cbdf 100644 --- a/fastdeploy/scheduler/local_scheduler.py +++ b/fastdeploy/scheduler/local_scheduler.py @@ -285,7 +285,7 @@ def get_requests( if short_partial_requests + long_partial_requests > self.max_num_partial_prefills: break else: - if current_prefill_tokens > max_num_batched_tokens: + if current_prefill_tokens > max_num_batched_tokens and len(requests) > 0: break requests.append(request.raw)