PaddlePaddle · LiqinruiG · Nov 3, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -200,9 +200,7 @@ async def chat_completion_stream_generator(
 
         max_streaming_response_tokens = max(1, max_streaming_response_tokens)
 
-        enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
-        if enable_thinking is None:
-            enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
+        enable_thinking = self._get_thinking_status(request)
 
         include_stop_str_in_output = request.include_stop_str_in_output
 
@@ -461,9 +459,7 @@ async def chat_completion_full_generator(
         """
         created_time = int(time.time())
         num_choices = 1 if request.n is None else request.n
-        enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
-        if enable_thinking is None:
-            enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
+        enable_thinking = self._get_thinking_status(request)
 
         include_stop_str_in_output = request.include_stop_str_in_output
         try:
@@ -750,3 +746,20 @@ def _build_logprobs_response(
             error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
             api_server_logger.error(error_msg)
             return None
+
+    def _get_thinking_status(self, request: ChatCompletionRequest) -> bool:
+        """
+        Get the thinking status from the request.
+        """
+        enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
+        if enable_thinking is None:
+            enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
+        options = request.chat_template_kwargs.get("options") if request.chat_template_kwargs else None
+        if options:
+            thinking_mode = options.get("thinking_mode")
+            if thinking_mode:
+                if thinking_mode == "close" or thinking_mode == "false":
+                    enable_thinking = False
+                else:
+                    enable_thinking = True
+        return enable_thinking
diff --git a/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py b/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py
@@ -237,6 +237,14 @@ def process_request_dict(self, request, max_model_len=None):
                             request[k] = v
                 else:
                     raise ValueError("Invalid input: chat_template_kwargs must be a dict")
+                options = chat_template_kwargs.get("options")
+                if options:
+                    thinking_mode = options.get("thinking_mode")
+                    if thinking_mode:
+                        if thinking_mode == "close" or thinking_mode == "false":
+                            request["enable_thinking"] = False
+                        else:
+                            request["enable_thinking"] = True
             request.setdefault("enable_thinking", True)
             outputs = self.ernie4_5_processor.request2ids(request)
         else:

diff --git a/fastdeploy/input/ernie4_5_vl_processor/process.py b/fastdeploy/input/ernie4_5_vl_processor/process.py
@@ -147,6 +147,7 @@ def __init__(
             "user": "User: ",
             "bot": "Assistant: ",
             "assistant": "Assistant: ",
+            "tool": "Tool: ",
         }
 
     def _build_token_type_mapping(self) -> Dict[Any, int]:

diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py
@@ -0,0 +1,71 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import unittest
+from unittest.mock import MagicMock
+
+from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest
+from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
+
+
+class TestOpenAIServingCompletion(unittest.TestCase):
+
+    def setUp(self):
+        """
+        Set up the test environment by creating an instance of the OpenAIServingChat class using Mock.
+        """
+        self.mock_engine = MagicMock()
+        self.chat_completion_handler = OpenAIServingChat(
+            self.mock_engine,
+            models=None,
+            pid=123,
+            ips=None,
+            max_waiting_time=10,
+            chat_template=None,
+        )
+
+    def test_enable_thinking(self):
+        request = ChatCompletionRequest(messages=[], chat_template_kwargs={})
+        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
+        self.assertEqual(enable_thinking, None)
+
+        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": True})
+        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
+        self.assertEqual(enable_thinking, True)
+
+        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": False})
+        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
+        self.assertEqual(enable_thinking, False)
+
+        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "close"}})
+        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
+        self.assertEqual(enable_thinking, False)
+
+        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "false"}})
+        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
+        self.assertEqual(enable_thinking, False)
+
+        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "open"}})
+        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
+        self.assertEqual(enable_thinking, True)
+
+        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "123"}})
+        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
+        self.assertEqual(enable_thinking, True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/input/test_ernie_vl_processor.py b/tests/input/test_ernie_vl_processor.py
@@ -0,0 +1,116 @@
+import unittest
+from unittest.mock import MagicMock, patch
+
+from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor
+
+
+class TestErnie4_5_vl_ProcessorProcessResponseDictStreaming(unittest.TestCase):
+    def setUp(self):
+        # 创建 Ernie4_5Processor 实例的模拟对象
+        with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None) as mock_init:
+            self.processor = Ernie4_5_VLProcessor("model_path")
+            mock_init.side_effect = lambda *args, **kwargs: print(f"__init__ called with {args}, {kwargs}")
+
+        # 设置必要的属性
+        self.processor.tokenizer = MagicMock()
+        self.processor.tokenizer.eos_token_id = 1
+        self.processor.decode_status = {}
+        self.processor.reasoning_end_dict = {}
+        self.processor.tool_parser_dict = {}
+        self.processor.generation_config = MagicMock()
+        self.processor.eos_token_ids = [1]
+        self.processor.reasoning_parser = MagicMock()
+        self.processor._check_mm_limits = MagicMock()
+        self.processor.ernie4_5_processor = MagicMock()
+        self.processor.pack_outputs = MagicMock()
+
+        # 模拟 ids2tokens 方法
+        def mock_ids2tokens(token_ids, task_id):
+            self.processor.decode_status[task_id] = "mock_decode_status"
+            return "delta_text", [2, 3], "previous_texts"
+
+        self.processor.ids2tokens = mock_ids2tokens
+
+        def mock_messages2ids(request, **kwargs):
+            if "chat_template" in kwargs:
+                return [1]
+            else:
+                return [0]
+
+        def mock_apply_default_parameters(request):
+            return request
+
+        self.processor._apply_default_parameters = mock_apply_default_parameters
+
+        # 模拟推理解析器
+        self.mock_reasoning_parser = MagicMock()
+        self.mock_reasoning_parser.__class__.__name__ = "ErnieX1ReasoningParser"
+        # self.mock_reasoning_parser.extract_reasoning_content_streaming.return_value = ("reasoning", "text")
+        self.processor.reasoning_parser = self.mock_reasoning_parser
+
+        # 模拟工具解析器
+        self.mock_tool_parser = MagicMock()
+        self.mock_tool_parser.extract_tool_calls_streaming.return_value = None
+        self.mock_tool_parser_obj = MagicMock()
+        self.mock_tool_parser_obj.return_value = self.mock_tool_parser
+        self.processor.tool_parser_obj = self.mock_tool_parser_obj
+
+    def test_process_request_dict_with_options(self):
+        request_dict = {
+            "messages": [{"role": "user", "content": "Hello"}],
+            "prompt_token_ids": [1, 1, 1],
+        }
+        self.processor.process_request_dict(request_dict, 100)
+        self.assertEqual(request_dict["enable_thinking"], True)
+
+        request_dict = {
+            "messages": [{"role": "user", "content": "Hello"}],
+            "chat_template_kwargs": {"enable_thinking": True},
+            "prompt_token_ids": [1, 1, 1],
+        }
+        self.processor.process_request_dict(request_dict, 100)
+        self.assertEqual(request_dict["enable_thinking"], True)
+
+        request_dict = {
+            "messages": [{"role": "user", "content": "Hello"}],
+            "chat_template_kwargs": {"enable_thinking": False},
+            "prompt_token_ids": [1, 1, 1],
+        }
+        self.processor.process_request_dict(request_dict, 100)
+        self.assertEqual(request_dict["enable_thinking"], False)
+
+        request_dict = {
+            "messages": [{"role": "user", "content": "Hello"}],
+            "chat_template_kwargs": {"options": {"thinking_mode": "open"}},
+            "prompt_token_ids": [1, 1, 1],
+        }
+        self.processor.process_request_dict(request_dict, 100)
+        self.assertEqual(request_dict["enable_thinking"], True)
+
+        request_dict = {
+            "messages": [{"role": "user", "content": "Hello"}],
+            "chat_template_kwargs": {"options": {"thinking_mode": "close"}},
+            "prompt_token_ids": [1, 1, 1],
+        }
+        self.processor.process_request_dict(request_dict, 100)
+        self.assertEqual(request_dict["enable_thinking"], False)
+
+        request_dict = {
+            "messages": [{"role": "user", "content": "Hello"}],
+            "chat_template_kwargs": {"options": {"thinking_mode": "false"}},
+            "prompt_token_ids": [1, 1, 1],
+        }
+        self.processor.process_request_dict(request_dict, 100)
+        self.assertEqual(request_dict["enable_thinking"], False)
+
+        request_dict = {
+            "messages": [{"role": "user", "content": "Hello"}],
+            "chat_template_kwargs": {"options": {"thinking_mode": "123"}},
+            "prompt_token_ids": [1, 1, 1],
+        }
+        self.processor.process_request_dict(request_dict, 100)
+        self.assertEqual(request_dict["enable_thinking"], True)
+
+
+if __name__ == "__main__":
+    unittest.main()