PaddlePaddle · luotao1 · Apr 7, 2026 · Apr 2, 2026
diff --git a/fastdeploy/demo/tokenzier_client_demo.py → fastdeploy/demo/tokenizer_client_demo.py b/fastdeploy/demo/tokenzier_client_demo.py → fastdeploy/demo/tokenizer_client_demo.py
@@ -16,7 +16,7 @@
 
 import asyncio
 
-from fastdeploy.input.tokenzier_client import (
+from fastdeploy.input.tokenizer_client import (
     AsyncTokenizerClient,
     ImageDecodeRequest,
     ImageEncodeRequest,

diff --git a/fastdeploy/entrypoints/openai/response_processors.py b/fastdeploy/entrypoints/openai/response_processors.py
@@ -18,7 +18,7 @@
 from typing import Any, Dict, List, Optional
 
 from fastdeploy.entrypoints.openai.usage_calculator import count_tokens
-from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest
+from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
 
 
 class ChatResponseProcessor:

diff --git a/fastdeploy/entrypoints/openai/v1/serving_chat.py b/fastdeploy/entrypoints/openai/v1/serving_chat.py
@@ -44,7 +44,7 @@
     OpenAiServingBase,
     ServingResponseContext,
 )
-from fastdeploy.input.tokenzier_client import AsyncTokenizerClient, ImageDecodeRequest
+from fastdeploy.input.tokenizer_client import AsyncTokenizerClient, ImageDecodeRequest
 from fastdeploy.metrics.metrics import main_process_metrics
 from fastdeploy.utils import api_server_logger
 from fastdeploy.worker.output import LogprobsLists

diff --git a/fastdeploy/input/tokenzier_client.py → fastdeploy/input/tokenizer_client.py b/fastdeploy/input/tokenzier_client.py → fastdeploy/input/tokenizer_client.py
diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
@@ -640,12 +640,12 @@ def _process_mm_features(self, request_list: List[Request]):
                                 image_features_output is not None
                             ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
                             grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
-                            mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
-                            mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
+                            mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
+                            mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
 
                             # add feature to encoder cache
                             self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
-                            feature_idx += mm_token_lenght
+                            feature_idx += mm_token_length
                             thw_idx += 1
 
                         feature_start = feature_position.offset
@@ -665,13 +665,13 @@ def _process_mm_features(self, request_list: List[Request]):
                 merge_image_features, thw_idx = [], 0
                 for feature_position in feature_position_item:
                     grid_thw = grid_thw_lst[thw_idx]
-                    mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
-                    mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
+                    mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
+                    mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
 
                     feature_start = feature_position.offset
                     feature_end = feature_position.offset + feature_position.length
                     merge_image_features.append(mm_feature[feature_start:feature_end])
-                    feature_idx += mm_token_lenght
+                    feature_idx += mm_token_length
                     thw_idx += 1
                 image_features_list.append(paddle.concat(merge_image_features, axis=0))
             for idx, index in req_idx_img_index_map.items():

diff --git a/fastdeploy/worker/metax_model_runner.py b/fastdeploy/worker/metax_model_runner.py
@@ -571,12 +571,12 @@ def _process_mm_features(self, request_list: List[Request]):
                                 image_features_output is not None
                             ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
                             grid_thw = multi_vision_inputs["grid_thw_lst_batches"][index][thw_idx]
-                            mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
-                            mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
+                            mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
+                            mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
 
                             # add feature to encoder cache
                             self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
-                            feature_idx += mm_token_lenght
+                            feature_idx += mm_token_length
                             thw_idx += 1
 
                         feature_start = feature_position.offset
@@ -596,13 +596,13 @@ def _process_mm_features(self, request_list: List[Request]):
                 merge_image_features, thw_idx = [], 0
                 for feature_position in feature_position_item:
                     grid_thw = grid_thw_lst[thw_idx]
-                    mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
-                    mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
+                    mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
+                    mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
 
                     feature_start = feature_position.offset
                     feature_end = feature_position.offset + feature_position.length
                     merge_image_features.append(mm_feature[feature_start:feature_end])
-                    feature_idx += mm_token_lenght
+                    feature_idx += mm_token_length
                     thw_idx += 1
                 image_features_list.append(paddle.concat(merge_image_features, axis=0))
             for idx, index in req_idx_img_index_map.items():

diff --git a/fastdeploy/worker/xpu_model_runner.py b/fastdeploy/worker/xpu_model_runner.py
@@ -485,12 +485,12 @@ def _process_mm_features(self, request_list: List[Request]):
                             image_features_output is not None
                         ), f"image_features_output is None, images_lst length: {len(multi_vision_inputs['images_lst'])}"
                         grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
-                        mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
-                        mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
+                        mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
+                        mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
 
                         # add feature to encoder cache
                         self.encoder_cache[mm_hash] = mm_feature.detach().cpu()
-                        feature_idx += mm_token_lenght
+                        feature_idx += mm_token_length
                         thw_idx += 1
 
                     feature_start = feature_position.offset
@@ -510,13 +510,13 @@ def _process_mm_features(self, request_list: List[Request]):
             image_features_output = self.extract_vision_features(multi_vision_inputs)
             for feature_position in multi_vision_inputs["feature_position_list"]:
                 grid_thw = multi_vision_inputs["grid_thw_lst"][thw_idx]
-                mm_token_lenght = inputs["mm_num_token_func"](grid_thw=grid_thw)
-                mm_feature = image_features_output[feature_idx : feature_idx + mm_token_lenght]
+                mm_token_length = inputs["mm_num_token_func"](grid_thw=grid_thw)
+                mm_feature = image_features_output[feature_idx : feature_idx + mm_token_length]
 
                 feature_start = feature_position.offset
                 feature_end = feature_position.offset + feature_position.length
                 merge_image_features.append(mm_feature[feature_start:feature_end])
-                feature_idx += mm_token_lenght
+                feature_idx += mm_token_length
                 thw_idx += 1
             self.share_inputs["image_features"] = paddle.concat(merge_image_features, axis=0)
 

diff --git a/tests/input/test_tokenizer_client.py b/tests/input/test_tokenizer_client.py
@@ -18,7 +18,7 @@
 import pytest
 import respx
 
-from fastdeploy.input.tokenzier_client import (
+from fastdeploy.input.tokenizer_client import (
     AsyncTokenizerClient,
     ImageEncodeRequest,
     VideoEncodeRequest,