Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ __device__ void speculate_update_repeat_times_optimized(
buffer_ptr_pre_ids.toggle();
}
}
// each core loads all the needed pre_ids into lm without mfence inbetween
// each core loads all the needed pre_ids into lm without mfence in between
// according to the index recorded by previous iteration
else {
int cnt = -1;
Expand Down
8 changes: 4 additions & 4 deletions fastdeploy/engine/common_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def _insert_task_to_worker(self):
main_process_metrics.num_requests_waiting.dec(len(tasks))
main_process_metrics.num_requests_running.inc(len(tasks))
except Exception as e:
err_msg = f"Error happend while insert task to engine: {e}, {traceback.format_exc()!s}."
err_msg = f"Error happened while insert task to engine: {e}, {traceback.format_exc()!s}."
llm_logger.error(err_msg)

def _scheduler_task_to_worker_v1(self):
Expand Down Expand Up @@ -560,7 +560,7 @@ def _fetch_request():
time.sleep(0.005)

except Exception as e:
err_msg = "Error happend while insert task to engine: {}, {}.".format(e, str(traceback.format_exc()))
err_msg = "Error happened while insert task to engine: {}, {}.".format(e, str(traceback.format_exc()))
llm_logger.error(err_msg)

def start_zmq_service(self, api_server_pid=None):
Expand Down Expand Up @@ -642,7 +642,7 @@ def _insert_zmq_task_to_scheduler(self):
self.zmq_server.send_multipart(request_id, [error_result])
except Exception as e:
llm_logger.error(
f"Error happend while receiving new request from zmq, details={e}, "
f"Error happened while receiving new request from zmq, details={e}, "
f"traceback={traceback.format_exc()}"
)

Expand All @@ -660,7 +660,7 @@ def _zmq_send_generated_tokens(self):
self.zmq_server.send_multipart(request_id, contents)

except Exception as e:
llm_logger.error(f"Unexcepted error happend: {e}, {traceback.format_exc()!s}")
llm_logger.error(f"Unexcepted error happened: {e}, {traceback.format_exc()!s}")

def split_mode_get_tasks(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ def generate(self, prompts, stream):
try:
req_id = self._format_and_add_data(prompts)
except Exception as e:
llm_logger.error(f"Error happend while adding request, details={e}, {str(traceback.format_exc())}")
llm_logger.error(f"Error happened while adding request, details={e}, {str(traceback.format_exc())}")
raise EngineError(str(e), error_code=400)

# Get the result of the current request
Expand Down
6 changes: 3 additions & 3 deletions fastdeploy/entrypoints/engine_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ async def add_requests(self, task):
f"preprocess time cost {preprocess_cost_time}"
)

self.vaild_parameters(task)
api_server_logger.debug(f"Recieve task: {task}")
self.valid_parameters(task)
api_server_logger.debug(f"Receive task: {task}")
try:
if not self.enable_mm:
self.zmq_client.send_json(task)
Expand All @@ -215,7 +215,7 @@ async def add_requests(self, task):
api_server_logger.error(f"zmq_client send task error: {e}, {str(traceback.format_exc())}")
raise EngineError(str(e), error_code=400)

def vaild_parameters(self, data):
def valid_parameters(self, data):
"""
Validate stream options
"""
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _receive_output(self):
continue
self.req_output[request_id].add(result)
except Exception as e:
llm_logger.error(f"Unexcepted error happend: {e}, {traceback.format_exc()!s}")
llm_logger.error(f"Unexcepted error happened: {e}, {traceback.format_exc()!s}")

def generate(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def get_kv_cache_shape(
kv_cache_quant_type: str = None,
):
"""
Caculate kv cache shape
Calculate kv cache shape
"""
if kv_cache_quant_type is not None and kv_cache_quant_type == "int4_zp":
return (
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/model_executor/layers/lm_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(
embedding_dim (int): size of hidden state.
prefix (str): The name of current layer. Defaults to "".
with_bias (bool): whether to have bias. Default: False.
dtype (str): The dtype of weight. Defalut: None.
dtype (str): The dtype of weight. Default: None.
"""
super(ParallelLMHead, self).__init__()
self.weight_key: str = prefix + ".weight"
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/model_executor/layers/sample/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def forward_cuda(
)
if sampling_metadata.enable_early_stop:
# will set the stop batch in stop_flags
assert sampling_metadata.stop_flags is not None, "need stop_flags for eary stop"
assert sampling_metadata.stop_flags is not None, "need stop_flags for early stop"
self.early_stopper.process(probs, next_tokens, sampling_metadata.stop_flags)

sampler_output = SamplerOutput(
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/model_executor/ops/triton_ops/triton_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ def decorator(*args, **kwargs):
op_dict = {"op_name": op_name, "reset_zero_when_tune": ""}
op_dict["triton_kernel_args"] = ",".join(modified_arg_exclude_constexpr)
op_dict["key"] = ",".join(self.key_args)
# when tunning, we need to reset the out to zero.
# when tuning, we need to reset the out to zero.
if "reset_zero_when_tune" in other_config.keys():
op_dict["reset_zero_when_tune"] = other_config["reset_zero_when_tune"]

Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/output/token_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def process_sampling_results(self):
)

except Exception as e:
print(f"Recieve message error: {e}")
print(f"Receive message error: {e}")
continue
else:
is_blocking = True
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/rl/dynamic_weight_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _update_ipc(self):

def clear_parameters(self, pid: int = 0) -> None:
"""Clear all model parameters and free memory."""
logger.info("start clear paramaters")
logger.info("start clear parameters")
paddle.device.cuda.empty_cache()
for param in self.model.state_dict().values():
param._clear_data()
Expand Down
8 changes: 4 additions & 4 deletions fastdeploy/rl/rollout_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def name(self) -> str:
return "Ernie4_5_MoeForCausalLMRL"

def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]:
"""Generate mapping between inference and training parameter for RL(donot delete!)."""
"""Generate mapping between inference and training parameter for RL(do not delete!)."""
if self._mappings_built:
return self.infer_to_train_mapping

Expand Down Expand Up @@ -225,7 +225,7 @@ def name(self) -> str:
return "Ernie4_5_VLMoeForConditionalGenerationRL"

def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]:
"""Generate mapping between inference and training parameter for RL(donot delete!)."""
"""Generate mapping between inference and training parameter for RL(do not delete!)."""
if self._mappings_built:
return self.infer_to_train_mapping

Expand Down Expand Up @@ -331,7 +331,7 @@ def name(self) -> str:
return "Qwen2ForCausalLMRL"

def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]:
"""Generate mapping between inference and training parameter for RL(donot delete!)."""
"""Generate mapping between inference and training parameter for RL(do not delete!)."""
if self._mappings_built:
return self.infer_to_train_mapping

Expand Down Expand Up @@ -380,7 +380,7 @@ def name(self) -> str:
return "Qwen3MoeForCausalLMRL"

def get_name_mappings_to_training(self, trainer_degree=None) -> Dict[str, str]:
"""Generate mapping between inference and training parameter for RL(donot delete!)."""
"""Generate mapping between inference and training parameter for RL(do not delete!)."""
if self._mappings_built:
return self.infer_to_train_mapping

Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/scheduler/global_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def _put_results_worker(self, tasks: List[Task]):
stolen_responses[response_queue_name].append(response.serialize())
continue

scheduler_logger.error(f"Scheduler has recieved a non-existent response from engine: {[response]}")
scheduler_logger.error(f"Scheduler has received a non-existent response from engine: {[response]}")

with self.mutex:
for request_id, responses in local_responses.items():
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/worker/dcu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def init_device(self):
"""
self.max_chips_per_node = 8
if self.device_config.device_type == "cuda" and paddle.device.is_compiled_with_cuda():
# Set evironment variable
# Set environment variable
self.device_ids = self.parallel_config.device_ids.split(",")
self.device = f"gpu:{self.local_rank % self.max_chips_per_node}"
paddle.device.set_device(self.device)
Expand Down
Loading