diff --git a/fastdeploy/demo/tokenzier_client_demo.py b/fastdeploy/demo/tokenizer_client_demo.py similarity index 98% rename from fastdeploy/demo/tokenzier_client_demo.py rename to fastdeploy/demo/tokenizer_client_demo.py index 0f4ba36c81d..dbd2eca75bf 100644 --- a/fastdeploy/demo/tokenzier_client_demo.py +++ b/fastdeploy/demo/tokenizer_client_demo.py @@ -16,7 +16,7 @@ import asyncio -from fastdeploy.input.tokenzier_client import ( +from fastdeploy.input.tokenizer_client import ( AsyncTokenizerClient, ImageDecodeRequest, ImageEncodeRequest, diff --git a/fastdeploy/entrypoints/openai/response_processors.py b/fastdeploy/entrypoints/openai/response_processors.py index 41761963be8..0954568111e 100644 --- a/fastdeploy/entrypoints/openai/response_processors.py +++ b/fastdeploy/entrypoints/openai/response_processors.py @@ -18,7 +18,7 @@ from typing import Any, Dict, List, Optional from fastdeploy.entrypoints.openai.usage_calculator import count_tokens -from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest +from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest class ChatResponseProcessor: diff --git a/fastdeploy/entrypoints/openai/v1/serving_chat.py b/fastdeploy/entrypoints/openai/v1/serving_chat.py index ed622c2d4dd..4337feea6ab 100644 --- a/fastdeploy/entrypoints/openai/v1/serving_chat.py +++ b/fastdeploy/entrypoints/openai/v1/serving_chat.py @@ -44,7 +44,7 @@ OpenAiServingBase, ServingResponseContext, ) -from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest +from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.utils import api_server_logger from fastdeploy.worker.output import LogprobsLists diff --git a/fastdeploy/input/tokenzier_client.py b/fastdeploy/input/tokenizer_client.py similarity index 100% rename from fastdeploy/input/tokenzier_client.py rename to fastdeploy/input/tokenizer_client.py diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index c0e689735d4..1641104eddc 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -640,12 +640,12 @@ def _process_mm_features(self, request_list: List[Request]): image_features_output is not None ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}" grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx] - mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) - mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] + mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw) + mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length] # add feature to encoder cache self.encoder_cache[mm_hash] = mm_feature.detach().cpu() - feature_idx += mm_token_lenght + feature_idx += mm_token_length thw_idx += 1 feature_start = feature_position.offset @@ -665,13 +665,13 @@ def _process_mm_features(self, request_list: List[Request]): merge_image_features, thw_idx = [], 0 for feature_position in feature_position_item: grid_thw = grid_thw_lst[thw_idx] - mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) - mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] + mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw) + mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length] feature_start = feature_position.offset feature_end = feature_position.offset + feature_position.length merge_image_features.append(mm_feature[feature_start:feature_end]) - feature_idx += mm_token_lenght + feature_idx += mm_token_length thw_idx += 1 image_features_list.append(paddle.concat(merge_image_features, axis=0)) for idx, index in req_idx_img_index_map.items(): diff --git a/fastdeploy/worker/metax_model_runner.py b/fastdeploy/worker/metax_model_runner.py index 93f5cec6a57..5bce889be69 100644 --- a/fastdeploy/worker/metax_model_runner.py +++ b/fastdeploy/worker/metax_model_runner.py @@ -571,12 +571,12 @@ def _process_mm_features(self, request_list: List[Request]): image_features_output is not None ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}" grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx] - mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) - mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] + mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw) + mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length] # add feature to encoder cache self.encoder_cache[mm_hash] = mm_feature.detach().cpu() - feature_idx += mm_token_lenght + feature_idx += mm_token_length thw_idx += 1 feature_start = feature_position.offset @@ -596,13 +596,13 @@ def _process_mm_features(self, request_list: List[Request]): merge_image_features, thw_idx = [], 0 for feature_position in feature_position_item: grid_thw = grid_thw_lst[thw_idx] - mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) - mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] + mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw) + mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length] feature_start = feature_position.offset feature_end = feature_position.offset + feature_position.length merge_image_features.append(mm_feature[feature_start:feature_end]) - feature_idx += mm_token_lenght + feature_idx += mm_token_length thw_idx += 1 image_features_list.append(paddle.concat(merge_image_features, axis=0)) for idx, index in req_idx_img_index_map.items(): diff --git a/fastdeploy/worker/xpu_model_runner.py b/fastdeploy/worker/xpu_model_runner.py index 1446257d3ae..5e981ebc816 100644 --- a/fastdeploy/worker/xpu_model_runner.py +++ b/fastdeploy/worker/xpu_model_runner.py @@ -485,12 +485,12 @@ def _process_mm_features(self, request_list: List[Request]): image_features_output is not None ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}" grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx] - mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) - mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] + mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw) + mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length] # add feature to encoder cache self.encoder_cache[mm_hash] = mm_feature.detach().cpu() - feature_idx += mm_token_lenght + feature_idx += mm_token_length thw_idx += 1 feature_start = feature_position.offset @@ -510,13 +510,13 @@ def _process_mm_features(self, request_list: List[Request]): image_features_output = self.extract_vision_features(multi_vision_inputs) for feature_position in multi_vision_inputs["feature_position_list"]: grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx] - mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw) - mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght] + mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw) + mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length] feature_start = feature_position.offset feature_end = feature_position.offset + feature_position.length merge_image_features.append(mm_feature[feature_start:feature_end]) - feature_idx += mm_token_lenght + feature_idx += mm_token_length thw_idx += 1 self.share_inputs["image_features"] = paddle.concat(merge_image_features, axis=0) diff --git a/tests/input/test_tokenizer_client.py b/tests/input/test_tokenizer_client.py index 42e3e8ee594..5721f54b9f5 100644 --- a/tests/input/test_tokenizer_client.py +++ b/tests/input/test_tokenizer_client.py @@ -18,7 +18,7 @@ import pytest import respx -from fastdeploy.input.tokenzier_client import ( +from fastdeploy.input.tokenizer_client import ( AsyncTokenizerClient, ImageEncodeRequest, VideoEncodeRequest,