diff --git a/ucm/patch/0.9.2/vllm-adapt.patch b/ucm/patch/0.9.2/vllm-adapt.patch index b224061c..2023e5f7 100644 --- a/ucm/patch/0.9.2/vllm-adapt.patch +++ b/ucm/patch/0.9.2/vllm-adapt.patch @@ -278,7 +278,7 @@ index b06b7cc80..22c22a148 100644 - (output, ) = self.collective_rpc( + non_block = self.max_concurrent_batches > 1 + -+ if not self.has_connector: ++ if not self.has_connector or self.vllm_config.model_config.use_mla: + # get output only from a single worker (output_rank) + (output, ) = self.collective_rpc( + "execute_model",