diff --git a/custom_ops/xpu_ops/src/plugin/src/kernel/kunlun3cpp/mtp_kernel/speculate_update_repeat_times.xpu b/custom_ops/xpu_ops/src/plugin/src/kernel/kunlun3cpp/mtp_kernel/speculate_update_repeat_times.xpu index ce3898fb261..4f42fd69f16 100644 --- a/custom_ops/xpu_ops/src/plugin/src/kernel/kunlun3cpp/mtp_kernel/speculate_update_repeat_times.xpu +++ b/custom_ops/xpu_ops/src/plugin/src/kernel/kunlun3cpp/mtp_kernel/speculate_update_repeat_times.xpu @@ -190,7 +190,7 @@ __device__ void speculate_update_repeat_times_optimized( buffer_ptr_pre_ids.toggle(); } } - // each core loads all the needed pre_ids into lm without mfence inbetween + // each core loads all the needed pre_ids into lm without mfence in between // according to the index recorded by previous iteration else { int cnt = -1; diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 929e093c4cd..b23f1bd6d0f 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -508,7 +508,7 @@ def _insert_task_to_worker(self): main_process_metrics.num_requests_waiting.dec(len(tasks)) main_process_metrics.num_requests_running.inc(len(tasks)) except Exception as e: - err_msg = f"Error happend while insert task to engine: {e}, {traceback.format_exc()!s}." + err_msg = f"Error happened while insert task to engine: {e}, {traceback.format_exc()!s}." llm_logger.error(err_msg) def _scheduler_task_to_worker_v1(self): @@ -560,7 +560,7 @@ def _fetch_request(): time.sleep(0.005) except Exception as e: - err_msg = "Error happend while insert task to engine: {}, {}.".format(e, str(traceback.format_exc())) + err_msg = "Error happened while insert task to engine: {}, {}.".format(e, str(traceback.format_exc())) llm_logger.error(err_msg) def start_zmq_service(self, api_server_pid=None): @@ -642,7 +642,7 @@ def _insert_zmq_task_to_scheduler(self): self.zmq_server.send_multipart(request_id, [error_result]) except Exception as e: llm_logger.error( - f"Error happend while receiving new request from zmq, details={e}, " + f"Error happened while receiving new request from zmq, details={e}, " f"traceback={traceback.format_exc()}" ) @@ -660,7 +660,7 @@ def _zmq_send_generated_tokens(self): self.zmq_server.send_multipart(request_id, contents) except Exception as e: - llm_logger.error(f"Unexcepted error happend: {e}, {traceback.format_exc()!s}") + llm_logger.error(f"Unexcepted error happened: {e}, {traceback.format_exc()!s}") def split_mode_get_tasks(self): """ diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index 00f24d998cc..9109cc7b6ff 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -563,7 +563,7 @@ def generate(self, prompts, stream): try: req_id = self._format_and_add_data(prompts) except Exception as e: - llm_logger.error(f"Error happend while adding request, details={e}, {str(traceback.format_exc())}") + llm_logger.error(f"Error happened while adding request, details={e}, {str(traceback.format_exc())}") raise EngineError(str(e), error_code=400) # Get the result of the current request diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py index fa23aaaee7f..777689c7361 100644 --- a/fastdeploy/entrypoints/engine_client.py +++ b/fastdeploy/entrypoints/engine_client.py @@ -204,8 +204,8 @@ async def add_requests(self, task): f"preprocess time cost {preprocess_cost_time}" ) - self.vaild_parameters(task) - api_server_logger.debug(f"Recieve task: {task}") + self.valid_parameters(task) + api_server_logger.debug(f"Receive task: {task}") try: if not self.enable_mm: self.zmq_client.send_json(task) @@ -215,7 +215,7 @@ async def add_requests(self, task): api_server_logger.error(f"zmq_client send task error: {e}, {str(traceback.format_exc())}") raise EngineError(str(e), error_code=400) - def vaild_parameters(self, data): + def valid_parameters(self, data): """ Validate stream options """ diff --git a/fastdeploy/entrypoints/llm.py b/fastdeploy/entrypoints/llm.py index d69068b6fc5..68a4d3a648f 100644 --- a/fastdeploy/entrypoints/llm.py +++ b/fastdeploy/entrypoints/llm.py @@ -125,7 +125,7 @@ def _receive_output(self): continue self.req_output[request_id].add(result) except Exception as e: - llm_logger.error(f"Unexcepted error happend: {e}, {traceback.format_exc()!s}") + llm_logger.error(f"Unexcepted error happened: {e}, {traceback.format_exc()!s}") def generate( self, diff --git a/fastdeploy/model_executor/layers/attention/moba_attention_backend.py b/fastdeploy/model_executor/layers/attention/moba_attention_backend.py index 7ddba90d1d0..024e97ee21e 100644 --- a/fastdeploy/model_executor/layers/attention/moba_attention_backend.py +++ b/fastdeploy/model_executor/layers/attention/moba_attention_backend.py @@ -124,7 +124,7 @@ def get_kv_cache_shape( kv_cache_quant_type: str = None, ): """ - Caculate kv cache shape + Calculate kv cache shape """ if kv_cache_quant_type is not None and kv_cache_quant_type == "int4_zp": return ( diff --git a/fastdeploy/model_executor/layers/lm_head.py b/fastdeploy/model_executor/layers/lm_head.py index a62e46d61d1..3af96bbe55c 100644 --- a/fastdeploy/model_executor/layers/lm_head.py +++ b/fastdeploy/model_executor/layers/lm_head.py @@ -56,7 +56,7 @@ def __init__( embedding_dim (int): size of hidden state. prefix (str): The name of current layer. Defaults to "". with_bias (bool): whether to have bias. Default: False. - dtype (str): The dtype of weight. Defalut: None. + dtype (str): The dtype of weight. Default: None. """ super(ParallelLMHead, self).__init__() self.weight_key: str = prefix + ".weight" diff --git a/fastdeploy/model_executor/layers/sample/sampler.py b/fastdeploy/model_executor/layers/sample/sampler.py index f8fd1755ab9..51ae0aec4a7 100644 --- a/fastdeploy/model_executor/layers/sample/sampler.py +++ b/fastdeploy/model_executor/layers/sample/sampler.py @@ -364,7 +364,7 @@ def forward_cuda( ) if sampling_metadata.enable_early_stop: # will set the stop batch in stop_flags - assert sampling_metadata.stop_flags is not None, "need stop_flags for eary stop" + assert sampling_metadata.stop_flags is not None, "need stop_flags for early stop" self.early_stopper.process(probs, next_tokens, sampling_metadata.stop_flags) sampler_output = SamplerOutput( diff --git a/fastdeploy/model_executor/ops/triton_ops/triton_utils.py b/fastdeploy/model_executor/ops/triton_ops/triton_utils.py index c6ebd27422e..2a2a00d0d09 100644 --- a/fastdeploy/model_executor/ops/triton_ops/triton_utils.py +++ b/fastdeploy/model_executor/ops/triton_ops/triton_utils.py @@ -683,7 +683,7 @@ def decorator(*args, **kwargs): op_dict = {"op_name": op_name, "reset_zero_when_tune": ""} op_dict["triton_kernel_args"] = ",".join(modified_arg_exclude_constexpr) op_dict["key"] = ",".join(self.key_args) - # when tunning, we need to reset the out to zero. + # when tuning, we need to reset the out to zero. if "reset_zero_when_tune" in other_config.keys(): op_dict["reset_zero_when_tune"] = other_config["reset_zero_when_tune"] diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 8245e56571e..8915b62ab7c 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -178,7 +178,7 @@ def process_sampling_results(self): ) except Exception as e: - print(f"Recieve message error: {e}") + print(f"Receive message error: {e}") continue else: is_blocking = True diff --git a/fastdeploy/rl/dynamic_weight_manager.py b/fastdeploy/rl/dynamic_weight_manager.py index e687c707ec2..f47928e6bd6 100644 --- a/fastdeploy/rl/dynamic_weight_manager.py +++ b/fastdeploy/rl/dynamic_weight_manager.py @@ -105,7 +105,7 @@ def _update_ipc(self): def clear_parameters(self, pid: int = 0) -> None: """Clear all model parameters and free memory.""" - logger.info("start clear paramaters") + logger.info("start clear parameters") paddle.device.cuda.empty_cache() for param in self.model.state_dict().values(): param._clear_data() diff --git a/fastdeploy/rl/rollout_model.py b/fastdeploy/rl/rollout_model.py index f6c390120e8..3282e4548f3 100644 --- a/fastdeploy/rl/rollout_model.py +++ b/fastdeploy/rl/rollout_model.py @@ -146,7 +146,7 @@ def name(self) -> str: return "Ernie4_5_MoeForCausalLMRL" def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]: - """Generate mapping between inference and training parameter for RL(donot delete!).""" + """Generate mapping between inference and training parameter for RL(do not delete!).""" if self._mappings_built: return self.infer_to_train_mapping @@ -225,7 +225,7 @@ def name(self) -> str: return "Ernie4_5_VLMoeForConditionalGenerationRL" def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]: - """Generate mapping between inference and training parameter for RL(donot delete!).""" + """Generate mapping between inference and training parameter for RL(do not delete!).""" if self._mappings_built: return self.infer_to_train_mapping @@ -331,7 +331,7 @@ def name(self) -> str: return "Qwen2ForCausalLMRL" def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]: - """Generate mapping between inference and training parameter for RL(donot delete!).""" + """Generate mapping between inference and training parameter for RL(do not delete!).""" if self._mappings_built: return self.infer_to_train_mapping @@ -380,7 +380,7 @@ def name(self) -> str: return "Qwen3MoeForCausalLMRL" def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]: - """Generate mapping between inference and training parameter for RL(donot delete!).""" + """Generate mapping between inference and training parameter for RL(do not delete!).""" if self._mappings_built: return self.infer_to_train_mapping diff --git a/fastdeploy/scheduler/global_scheduler.py b/fastdeploy/scheduler/global_scheduler.py index f3962992cc2..db1f85c83e8 100644 --- a/fastdeploy/scheduler/global_scheduler.py +++ b/fastdeploy/scheduler/global_scheduler.py @@ -648,7 +648,7 @@ def _put_results_worker(self, tasks: List[Task]): stolen_responses[response_queue_name].append(response.serialize()) continue - scheduler_logger.error(f"Scheduler has recieved a non-existent response from engine: {[response]}") + scheduler_logger.error(f"Scheduler has received a non-existent response from engine: {[response]}") with self.mutex: for request_id, responses in local_responses.items(): diff --git a/fastdeploy/worker/dcu_worker.py b/fastdeploy/worker/dcu_worker.py index 0945f512fb7..c87a27c29cb 100644 --- a/fastdeploy/worker/dcu_worker.py +++ b/fastdeploy/worker/dcu_worker.py @@ -49,7 +49,7 @@ def init_device(self): """ self.max_chips_per_node = 8 if self.device_config.device_type == "cuda" and paddle.device.is_compiled_with_cuda(): - # Set evironment variable + # Set environment variable self.device_ids = self.parallel_config.device_ids.split(",") self.device = f"gpu:{self.local_rank % self.max_chips_per_node}" paddle.device.set_device(self.device)