diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index cf11ba8fffd..b0b407e05ad 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -200,9 +200,7 @@ async def chat_completion_stream_generator( max_streaming_response_tokens = max(1, max_streaming_response_tokens) - enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None - if enable_thinking is None: - enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None + enable_thinking = self._get_thinking_status(request) include_stop_str_in_output = request.include_stop_str_in_output @@ -461,9 +459,7 @@ async def chat_completion_full_generator( """ created_time = int(time.time()) num_choices = 1 if request.n is None else request.n - enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None - if enable_thinking is None: - enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None + enable_thinking = self._get_thinking_status(request) include_stop_str_in_output = request.include_stop_str_in_output try: @@ -750,3 +746,20 @@ def _build_logprobs_response( error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}" api_server_logger.error(error_msg) return None + + def _get_thinking_status(self, request: ChatCompletionRequest) -> bool: + """ + Get the thinking status from the request. + """ + enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None + if enable_thinking is None: + enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None + options = request.chat_template_kwargs.get("options") if request.chat_template_kwargs else None + if options: + thinking_mode = options.get("thinking_mode") + if thinking_mode: + if thinking_mode == "close" or thinking_mode == "false": + enable_thinking = False + else: + enable_thinking = True + return enable_thinking diff --git a/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py b/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py index 1211eccf532..317980038dc 100644 --- a/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py +++ b/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py @@ -237,6 +237,14 @@ def process_request_dict(self, request, max_model_len=None): request[k] = v else: raise ValueError("Invalid input: chat_template_kwargs must be a dict") + options = chat_template_kwargs.get("options") + if options: + thinking_mode = options.get("thinking_mode") + if thinking_mode: + if thinking_mode == "close" or thinking_mode == "false": + request["enable_thinking"] = False + else: + request["enable_thinking"] = True request.setdefault("enable_thinking", True) outputs = self.ernie4_5_processor.request2ids(request) else: diff --git a/fastdeploy/input/ernie4_5_vl_processor/process.py b/fastdeploy/input/ernie4_5_vl_processor/process.py index de81a551dc0..4ccdf287f20 100644 --- a/fastdeploy/input/ernie4_5_vl_processor/process.py +++ b/fastdeploy/input/ernie4_5_vl_processor/process.py @@ -147,6 +147,7 @@ def __init__( "user": "User: ", "bot": "Assistant: ", "assistant": "Assistant: ", + "tool": "Tool: ", } def _build_token_type_mapping(self) -> Dict[Any, int]: diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py new file mode 100644 index 00000000000..394a23f0f4e --- /dev/null +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -0,0 +1,71 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +import unittest +from unittest.mock import MagicMock + +from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest +from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat + + +class TestOpenAIServingCompletion(unittest.TestCase): + + def setUp(self): + """ + Set up the test environment by creating an instance of the OpenAIServingChat class using Mock. + """ + self.mock_engine = MagicMock() + self.chat_completion_handler = OpenAIServingChat( + self.mock_engine, + models=None, + pid=123, + ips=None, + max_waiting_time=10, + chat_template=None, + ) + + def test_enable_thinking(self): + request = ChatCompletionRequest(messages=[], chat_template_kwargs={}) + enable_thinking = self.chat_completion_handler._get_thinking_status(request) + self.assertEqual(enable_thinking, None) + + request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": True}) + enable_thinking = self.chat_completion_handler._get_thinking_status(request) + self.assertEqual(enable_thinking, True) + + request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": False}) + enable_thinking = self.chat_completion_handler._get_thinking_status(request) + self.assertEqual(enable_thinking, False) + + request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "close"}}) + enable_thinking = self.chat_completion_handler._get_thinking_status(request) + self.assertEqual(enable_thinking, False) + + request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "false"}}) + enable_thinking = self.chat_completion_handler._get_thinking_status(request) + self.assertEqual(enable_thinking, False) + + request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "open"}}) + enable_thinking = self.chat_completion_handler._get_thinking_status(request) + self.assertEqual(enable_thinking, True) + + request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "123"}}) + enable_thinking = self.chat_completion_handler._get_thinking_status(request) + self.assertEqual(enable_thinking, True) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/input/test_ernie_vl_processor.py b/tests/input/test_ernie_vl_processor.py new file mode 100644 index 00000000000..92d24d5b96f --- /dev/null +++ b/tests/input/test_ernie_vl_processor.py @@ -0,0 +1,116 @@ +import unittest +from unittest.mock import MagicMock, patch + +from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor + + +class TestErnie4_5_vl_ProcessorProcessResponseDictStreaming(unittest.TestCase): + def setUp(self): + # 创建 Ernie4_5Processor 实例的模拟对象 + with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None) as mock_init: + self.processor = Ernie4_5_VLProcessor("model_path") + mock_init.side_effect = lambda *args, **kwargs: print(f"__init__ called with {args}, {kwargs}") + + # 设置必要的属性 + self.processor.tokenizer = MagicMock() + self.processor.tokenizer.eos_token_id = 1 + self.processor.decode_status = {} + self.processor.reasoning_end_dict = {} + self.processor.tool_parser_dict = {} + self.processor.generation_config = MagicMock() + self.processor.eos_token_ids = [1] + self.processor.reasoning_parser = MagicMock() + self.processor._check_mm_limits = MagicMock() + self.processor.ernie4_5_processor = MagicMock() + self.processor.pack_outputs = MagicMock() + + # 模拟 ids2tokens 方法 + def mock_ids2tokens(token_ids, task_id): + self.processor.decode_status[task_id] = "mock_decode_status" + return "delta_text", [2, 3], "previous_texts" + + self.processor.ids2tokens = mock_ids2tokens + + def mock_messages2ids(request, **kwargs): + if "chat_template" in kwargs: + return [1] + else: + return [0] + + def mock_apply_default_parameters(request): + return request + + self.processor._apply_default_parameters = mock_apply_default_parameters + + # 模拟推理解析器 + self.mock_reasoning_parser = MagicMock() + self.mock_reasoning_parser.__class__.__name__ = "ErnieX1ReasoningParser" + # self.mock_reasoning_parser.extract_reasoning_content_streaming.return_value = ("reasoning", "text") + self.processor.reasoning_parser = self.mock_reasoning_parser + + # 模拟工具解析器 + self.mock_tool_parser = MagicMock() + self.mock_tool_parser.extract_tool_calls_streaming.return_value = None + self.mock_tool_parser_obj = MagicMock() + self.mock_tool_parser_obj.return_value = self.mock_tool_parser + self.processor.tool_parser_obj = self.mock_tool_parser_obj + + def test_process_request_dict_with_options(self): + request_dict = { + "messages": [{"role": "user", "content": "Hello"}], + "prompt_token_ids": [1, 1, 1], + } + self.processor.process_request_dict(request_dict, 100) + self.assertEqual(request_dict["enable_thinking"], True) + + request_dict = { + "messages": [{"role": "user", "content": "Hello"}], + "chat_template_kwargs": {"enable_thinking": True}, + "prompt_token_ids": [1, 1, 1], + } + self.processor.process_request_dict(request_dict, 100) + self.assertEqual(request_dict["enable_thinking"], True) + + request_dict = { + "messages": [{"role": "user", "content": "Hello"}], + "chat_template_kwargs": {"enable_thinking": False}, + "prompt_token_ids": [1, 1, 1], + } + self.processor.process_request_dict(request_dict, 100) + self.assertEqual(request_dict["enable_thinking"], False) + + request_dict = { + "messages": [{"role": "user", "content": "Hello"}], + "chat_template_kwargs": {"options": {"thinking_mode": "open"}}, + "prompt_token_ids": [1, 1, 1], + } + self.processor.process_request_dict(request_dict, 100) + self.assertEqual(request_dict["enable_thinking"], True) + + request_dict = { + "messages": [{"role": "user", "content": "Hello"}], + "chat_template_kwargs": {"options": {"thinking_mode": "close"}}, + "prompt_token_ids": [1, 1, 1], + } + self.processor.process_request_dict(request_dict, 100) + self.assertEqual(request_dict["enable_thinking"], False) + + request_dict = { + "messages": [{"role": "user", "content": "Hello"}], + "chat_template_kwargs": {"options": {"thinking_mode": "false"}}, + "prompt_token_ids": [1, 1, 1], + } + self.processor.process_request_dict(request_dict, 100) + self.assertEqual(request_dict["enable_thinking"], False) + + request_dict = { + "messages": [{"role": "user", "content": "Hello"}], + "chat_template_kwargs": {"options": {"thinking_mode": "123"}}, + "prompt_token_ids": [1, 1, 1], + } + self.processor.process_request_dict(request_dict, 100) + self.assertEqual(request_dict["enable_thinking"], True) + + +if __name__ == "__main__": + unittest.main()