From 1c933ddce38e7fef4f33dbbd708c04256862fab9 Mon Sep 17 00:00:00 2001 From: yuanlehome Date: Wed, 17 Dec 2025 15:30:41 +0800 Subject: [PATCH] fix rl singal --- fastdeploy/worker/worker_process.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index 6975a08848e..77ffdc43dea 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -423,17 +423,11 @@ def event_loop_normal(self) -> None: while True: # run eplb self._run_eplb(tp_rank) - if tp_rank == 0: + + if self.fd_config.load_config.dynamic_load_weight: if self.model_weights_status.value[0] != ModelWeightsStatus.NORMAL: self.model_weights_signal[0] = int(self.model_weights_status.value[0]) - if self.fd_config.load_config.dynamic_load_weight and self.parallel_config.enable_expert_parallel: - self.model_weights_signal[0] = self._broadcast_model_weights_signal( - src=0, group=self.parallel_config.ep_group - ) - if self.fd_config.load_config.dynamic_load_weight and tp_size > 1: - self.model_weights_signal[0] = self._broadcast_model_weights_signal( - src=0, group=self.parallel_config.tp_group - ) + self.model_weights_signal[0] = self._broadcast_model_weights_signal(src=0, group=None) self.insert_step = False req_dicts = None @@ -455,11 +449,8 @@ def event_loop_normal(self) -> None: self._tp_barrier_wait() if self.fd_config.load_config.dynamic_load_weight: - if self.parallel_config.enable_expert_parallel: - paddle.distributed.barrier(self.parallel_config.ep_group) - else: - paddle.distributed.barrier(self.parallel_config.tp_group) if self.model_weights_signal[0] != ModelWeightsStatus.NORMAL: + paddle.distributed.barrier() logger.info( f"Rank: {self.local_rank} to update or clear parameters, signal is {self.model_weights_signal[0]}, [-1:clear, 1:update]" )