Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions fastdeploy/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,7 @@ async def chat_completion_stream_generator(

max_streaming_response_tokens = max(1, max_streaming_response_tokens)

enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
if enable_thinking is None:
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
enable_thinking = self._get_thinking_status(request)

include_stop_str_in_output = request.include_stop_str_in_output

Expand Down Expand Up @@ -461,9 +459,7 @@ async def chat_completion_full_generator(
"""
created_time = int(time.time())
num_choices = 1 if request.n is None else request.n
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
if enable_thinking is None:
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
enable_thinking = self._get_thinking_status(request)

include_stop_str_in_output = request.include_stop_str_in_output
try:
Expand Down Expand Up @@ -750,3 +746,20 @@ def _build_logprobs_response(
error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
api_server_logger.error(error_msg)
return None

def _get_thinking_status(self, request: ChatCompletionRequest) -> bool:
"""
Get the thinking status from the request.
"""
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
if enable_thinking is None:
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
options = request.chat_template_kwargs.get("options") if request.chat_template_kwargs else None
if options:
thinking_mode = options.get("thinking_mode")
if thinking_mode:
if thinking_mode == "close" or thinking_mode == "false":
enable_thinking = False
else:
enable_thinking = True
return enable_thinking
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,14 @@ def process_request_dict(self, request, max_model_len=None):
request[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
options = chat_template_kwargs.get("options")
if options:
thinking_mode = options.get("thinking_mode")
if thinking_mode:
if thinking_mode == "close" or thinking_mode == "false":
request["enable_thinking"] = False
else:
request["enable_thinking"] = True
request.setdefault("enable_thinking", True)
outputs = self.ernie4_5_processor.request2ids(request)
else:
Expand Down
1 change: 1 addition & 0 deletions fastdeploy/input/ernie4_5_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def __init__(
"user": "User: ",
"bot": "Assistant: ",
"assistant": "Assistant: ",
"tool": "Tool: ",
}

def _build_token_type_mapping(self) -> Dict[Any, int]:
Expand Down
71 changes: 71 additions & 0 deletions tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""

import unittest
from unittest.mock import MagicMock

from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest
from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat


class TestOpenAIServingCompletion(unittest.TestCase):

def setUp(self):
"""
Set up the test environment by creating an instance of the OpenAIServingChat class using Mock.
"""
self.mock_engine = MagicMock()
self.chat_completion_handler = OpenAIServingChat(
self.mock_engine,
models=None,
pid=123,
ips=None,
max_waiting_time=10,
chat_template=None,
)

def test_enable_thinking(self):
request = ChatCompletionRequest(messages=[], chat_template_kwargs={})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, None)

request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": True})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, True)

request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": False})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, False)

request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "close"}})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, False)

request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "false"}})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, False)

request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "open"}})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, True)

request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "123"}})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, True)


if __name__ == "__main__":
unittest.main()
116 changes: 116 additions & 0 deletions tests/input/test_ernie_vl_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import unittest
from unittest.mock import MagicMock, patch

from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor


class TestErnie4_5_vl_ProcessorProcessResponseDictStreaming(unittest.TestCase):
def setUp(self):
# 创建 Ernie4_5Processor 实例的模拟对象
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None) as mock_init:
self.processor = Ernie4_5_VLProcessor("model_path")
mock_init.side_effect = lambda *args, **kwargs: print(f"__init__ called with {args}, {kwargs}")

# 设置必要的属性
self.processor.tokenizer = MagicMock()
self.processor.tokenizer.eos_token_id = 1
self.processor.decode_status = {}
self.processor.reasoning_end_dict = {}
self.processor.tool_parser_dict = {}
self.processor.generation_config = MagicMock()
self.processor.eos_token_ids = [1]
self.processor.reasoning_parser = MagicMock()
self.processor._check_mm_limits = MagicMock()
self.processor.ernie4_5_processor = MagicMock()
self.processor.pack_outputs = MagicMock()

# 模拟 ids2tokens 方法
def mock_ids2tokens(token_ids, task_id):
self.processor.decode_status[task_id] = "mock_decode_status"
return "delta_text", [2, 3], "previous_texts"

self.processor.ids2tokens = mock_ids2tokens

def mock_messages2ids(request, **kwargs):
if "chat_template" in kwargs:
return [1]
else:
return [0]

def mock_apply_default_parameters(request):
return request

self.processor._apply_default_parameters = mock_apply_default_parameters

# 模拟推理解析器
self.mock_reasoning_parser = MagicMock()
self.mock_reasoning_parser.__class__.__name__ = "ErnieX1ReasoningParser"
# self.mock_reasoning_parser.extract_reasoning_content_streaming.return_value = ("reasoning", "text")
self.processor.reasoning_parser = self.mock_reasoning_parser

# 模拟工具解析器
self.mock_tool_parser = MagicMock()
self.mock_tool_parser.extract_tool_calls_streaming.return_value = None
self.mock_tool_parser_obj = MagicMock()
self.mock_tool_parser_obj.return_value = self.mock_tool_parser
self.processor.tool_parser_obj = self.mock_tool_parser_obj

def test_process_request_dict_with_options(self):
request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], True)

request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"enable_thinking": True},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], True)

request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"enable_thinking": False},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], False)

request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"options": {"thinking_mode": "open"}},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], True)

request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"options": {"thinking_mode": "close"}},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], False)

request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"options": {"thinking_mode": "false"}},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], False)

request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"options": {"thinking_mode": "123"}},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], True)


if __name__ == "__main__":
unittest.main()
Loading