diff --git a/fastdeploy/entrypoints/openai/response_processors.py b/fastdeploy/entrypoints/openai/response_processors.py
index 95a5e3ec404..469f31dda4c 100644
--- a/fastdeploy/entrypoints/openai/response_processors.py
+++ b/fastdeploy/entrypoints/openai/response_processors.py
@@ -68,13 +68,12 @@ def accumulate_token_ids(self, request_output):
             else:
                 self._multipart_buffer.append({"decode_type": decode_type, "request_output": request_output})
 
-    async def process_response_chat(self, request_outputs, stream, enable_thinking, include_stop_str_in_output):
+    async def process_response_chat(self, request_outputs, stream, include_stop_str_in_output):
         """
         Process a list of responses into a generator that yields each processed response as it's generated.
         Args:
             request_outputs: The list of outputs to be processed.
             stream: Whether or not to stream the output.
-            enable_thinking: Whether or not to show thinking messages.
             include_stop_str_in_output: Whether or not to include stop strings in the output.
         """
         for request_output in request_outputs:
@@ -83,7 +82,6 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking,
                 yield self.data_processor.process_response_dict(
                     response_dict=request_output,
                     stream=stream,
-                    enable_thinking=enable_thinking,
                     include_stop_str_in_output=include_stop_str_in_output,
                 )
             elif stream:
@@ -111,7 +109,6 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking,
                     self.data_processor.process_response_dict(
                         response_dict=request_output,
                         stream=stream,
-                        enable_thinking=enable_thinking,
                         include_stop_str_in_output=include_stop_str_in_output,
                     )
                     text = {"type": "text", "text": request_output["outputs"]["text"]}
@@ -132,7 +129,6 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking,
                             self.data_processor.process_response_dict(
                                 response_dict=part["request_output"],
                                 stream=False,
-                                enable_thinking=enable_thinking,
                                 include_stop_str_in_output=include_stop_str_in_output,
                             )
                             text = {"type": "text", "text": part["request_output"]["outputs"]["text"]}
diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py
index 20846131233..aa761130c24 100644
--- a/fastdeploy/entrypoints/openai/serving_chat.py
+++ b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -210,8 +210,6 @@ async def chat_completion_stream_generator(
 
         max_streaming_response_tokens = max(1, max_streaming_response_tokens)
 
-        enable_thinking = self._get_thinking_status(request)
-
         include_stop_str_in_output = request.include_stop_str_in_output
 
         stream_options = request.stream_options
@@ -267,7 +265,6 @@ async def chat_completion_stream_generator(
                 generator = response_processor.process_response_chat(
                     response,
                     stream=True,
-                    enable_thinking=enable_thinking,
                     include_stop_str_in_output=include_stop_str_in_output,
                 )
 
@@ -469,7 +466,6 @@ async def chat_completion_full_generator(
         """
         created_time = int(time.time())
         num_choices = 1 if request.n is None else request.n
-        enable_thinking = self._get_thinking_status(request)
 
         include_stop_str_in_output = request.include_stop_str_in_output
         try:
@@ -523,7 +519,6 @@ async def chat_completion_full_generator(
                 generator = response_processor.process_response_chat(
                     response,
                     stream=False,
-                    enable_thinking=enable_thinking,
                     include_stop_str_in_output=include_stop_str_in_output,
                 )
                 async for data in generator:
@@ -757,20 +752,3 @@ def _build_logprobs_response(
             error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
             api_server_logger.error(error_msg)
             return None
-
-    def _get_thinking_status(self, request: ChatCompletionRequest) -> bool:
-        """
-        Get the thinking status from the request.
-        """
-        enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
-        if enable_thinking is None:
-            enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
-        options = request.chat_template_kwargs.get("options") if request.chat_template_kwargs else None
-        if options:
-            thinking_mode = options.get("thinking_mode")
-            if thinking_mode:
-                if thinking_mode == "close" or thinking_mode == "false":
-                    enable_thinking = False
-                else:
-                    enable_thinking = True
-        return enable_thinking
diff --git a/fastdeploy/input/ernie4_5_processor.py b/fastdeploy/input/ernie4_5_processor.py
index a151dbfdd6d..edd21796bc2 100644
--- a/fastdeploy/input/ernie4_5_processor.py
+++ b/fastdeploy/input/ernie4_5_processor.py
@@ -60,6 +60,7 @@ def __init__(self, model_name_or_path, reasoning_parser_obj=None, tool_parser_ob
         self.decode_status = dict()
         self.tool_parser_dict = dict()
         self.thinking_parser_dict = dict()
+        self.model_status_dict = dict()
         self._load_tokenizer()
         data_processor_logger.info(
             f"tokenizer information: bos_token is {self.tokenizer.bos_token} \
@@ -151,8 +152,13 @@ def process_request(self, request, max_model_len=None, **kwargs):
             request.set("temperature", 1)
         if request.get("top_p") < _SAMPLING_EPS:
             request.set("top_p", _SAMPLING_EPS)
-        if self.reasoning_parser and self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser":
-            request.enable_thinking = True
+        if self.reasoning_parser:
+            real_req_id = request.request_id.split("_")[0]
+            n = request.get("n", 1)
+            model_status = self.reasoning_parser.get_model_status(request.prompt_token_ids)
+            for idx in range(n):
+                self.model_status_dict[f"{real_req_id}_{idx}"] = model_status
+            request.enable_thinking = model_status == "think_start"
 
         data_processor_logger.info(f"Processed request: {request}")
         return request
@@ -229,9 +235,13 @@ def process_request_dict(self, request, max_model_len=None):
             request["temperature"] = 1
         if request.get("top_p") < _SAMPLING_EPS:
             request["top_p"] = _SAMPLING_EPS
-        if self.reasoning_parser and self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser":
-            request["enable_thinking"] = True
-
+        if self.reasoning_parser:
+            real_req_id = request["request_id"].split("_")[0]
+            model_status = self.reasoning_parser.get_model_status(request["prompt_token_ids"])
+            n = request.get("n", 1)
+            for idx in range(n):
+                self.model_status_dict[f"{real_req_id}_{idx}"] = model_status
+            request["enable_thinking"] = model_status == "think_start"
         data_processor_logger.info(f"Processed request dict: {request}")
         return request
 
@@ -253,7 +263,11 @@ def process_response(self, response_dict, **kwargs):
             token_ids = token_ids[:-1]
         full_text = self.tokenizer.decode(token_ids)
         if self.reasoning_parser:
-            reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
+            reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
+                full_text,
+                response_dict,
+                self.model_status_dict[req_id],
+            )
             response_dict.outputs.text = text
             response_dict.outputs.reasoning_content = reasoning_content
         else:
@@ -264,6 +278,8 @@ def process_response(self, response_dict, **kwargs):
             if tool_call_info.tools_called:
                 response_dict.outputs.tool_calls = tool_call_info.tool_calls
                 response_dict.outputs.text = tool_call_info.content
+        if req_id in self.model_status_dict:
+            del self.model_status_dict[req_id]
         data_processor_logger.info(f"req_id:{req_id}, token_ids: {token_ids}")
         if response_dict.outputs.text == "" and response_dict.outputs.reasoning_content == "":
             return None
@@ -294,7 +310,6 @@ def process_response_dict_normal(self, response_dict, **kwargs):
         Returns:
             Dict: response contain text fields
         """
-        enable_thinking = kwargs.get("enable_thinking")
         token_ids = response_dict["outputs"]["token_ids"]
         is_end = response_dict["finished"]
         req_id = response_dict["request_id"]
@@ -304,16 +319,17 @@ def process_response_dict_normal(self, response_dict, **kwargs):
         delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
         if is_end:
             full_text = previous_texts + delta_text
-            if self.reasoning_parser and (
-                enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
-            ):
-                reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
+            response_dict["outputs"]["text"] = full_text
+            if self.reasoning_parser:
+                reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
+                    full_text,
+                    response_dict,
+                    self.model_status_dict[req_id],
+                )
                 response_dict["outputs"]["text"] = text
                 response_dict["outputs"]["reasoning_content"] = reasoning_content
                 reasoning_tokens = self.tokenizer.tokenize(reasoning_content)
                 response_dict["outputs"]["reasoning_token_num"] = len(reasoning_tokens)
-            else:
-                response_dict["outputs"]["text"] = full_text
             if self.tool_parser_obj:
                 tool_parser = self.tool_parser_obj(self.tokenizer)
                 tool_call_info = tool_parser.extract_tool_calls(full_text, response_dict)
@@ -323,6 +339,8 @@ def process_response_dict_normal(self, response_dict, **kwargs):
             response_dict["outputs"]["completion_tokens"] = full_text
             data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}")
             del self.decode_status[req_id]
+            if req_id in self.model_status_dict:
+                del self.model_status_dict[req_id]
         return response_dict
 
     def process_response_dict_streaming(self, response_dict, **kwargs):
@@ -335,7 +353,6 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
         Returns:
             Dict: response contain text fields
         """
-        enable_thinking = kwargs.get("enable_thinking")
         is_end = response_dict["finished"]
         req_id = response_dict["request_id"]
         token_ids = response_dict["outputs"]["token_ids"]
@@ -345,9 +362,7 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
                 token_ids = token_ids[:-1]
         delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
         response_dict["outputs"]["completion_tokens"] = delta_text
-        if self.reasoning_parser and (
-            enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
-        ):
+        if self.reasoning_parser:
             reasoning_delta_message = self.reasoning_parser.extract_reasoning_content_streaming(
                 previous_texts,
                 previous_texts + delta_text,
@@ -355,6 +370,7 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
                 previous_token_ids,
                 previous_token_ids + token_ids,
                 token_ids,
+                self.model_status_dict[req_id],
             )
             response_dict["outputs"]["delta_message"] = reasoning_delta_message
             reasoning_content = reasoning_delta_message.reasoning_content if reasoning_delta_message else None
@@ -381,6 +397,8 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
             del self.decode_status[req_id]
             if req_id in self.tool_parser_dict:
                 del self.tool_parser_dict[req_id]
+            if req_id in self.model_status_dict:
+                del self.model_status_dict[req_id]
         return response_dict
 
     def messages2ids(self, request_or_messages, **kwargs):
diff --git a/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py b/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py
index 0fe724af53f..f164d095fcd 100644
--- a/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py
+++ b/fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py
@@ -58,6 +58,7 @@ def __init__(
 
         self.tool_parser_dict = dict()
         self.decode_status = dict()
+        self.model_status_dict = dict()
         self._load_tokenizer()
 
         # Generation config
@@ -272,6 +273,13 @@ def process_request_dict(self, request, max_model_len=None):
             request["reasoning_max_tokens"] = max(int(request["max_tokens"] * 0.8), 1)
         data_processor_logger.info(f"Processed request {request}")
 
+        if self.reasoning_parser:
+            real_req_id = request["request_id"].split("_")[0]
+            model_status = self.reasoning_parser.get_model_status(request["prompt_token_ids"])
+            n = request.get("n", 1)
+            for idx in range(n):
+                self.model_status_dict[f"{real_req_id}_{idx}"] = model_status
+            request["enable_thinking"] = model_status == "think_start"
         if request.get("top_p") is not None and request.get("top_p") < _SAMPLING_EPS:
             request["top_p"] = _SAMPLING_EPS
 
@@ -307,21 +315,3 @@ def pack_outputs(self, outs):
         outs["position_ids"] = np.array(outs["position_ids"], dtype=np.int64)
 
         return outs
-
-    def process_response_dict(self, response_dict, stream, **kwargs):
-        """
-        Preprocess the response
-
-        Args:
-            response_dict (Dict): response for engine, contain ids fields
-
-        Returns:
-            Dict: response contain text fields
-        """
-        enable_thinking = kwargs.pop("enable_thinking", True)
-        if enable_thinking is None:
-            enable_thinking = True
-        if stream:
-            return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs)
-        else:
-            return self.process_response_dict_normal(response_dict, enable_thinking=enable_thinking, **kwargs)
diff --git a/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py b/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py
index 06f43f335ae..cda49092c86 100644
--- a/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py
+++ b/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py
@@ -269,6 +269,13 @@ def process_request_dict(self, request, max_model_len=None):
         # Set default max_tokens if not specified
         if request.get("max_tokens") is None:
             request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"]))  # Ensure at least 1 token
+        if self.reasoning_parser:
+            real_req_id = request["request_id"].split("_")[0]
+            model_status = self.reasoning_parser.get_model_status(request["prompt_token_ids"])
+            n = request.get("n", 1)
+            for idx in range(n):
+                self.model_status_dict[f"{real_req_id}_{idx}"] = model_status
+            request["enable_thinking"] = model_status == "think_start"
         data_processor_logger.info(f"Processed request {request}")
 
         return request
diff --git a/fastdeploy/input/text_processor.py b/fastdeploy/input/text_processor.py
index 066909f361e..138eb59f171 100644
--- a/fastdeploy/input/text_processor.py
+++ b/fastdeploy/input/text_processor.py
@@ -175,6 +175,7 @@ def __init__(self, model_name_or_path, reasoning_parser_obj=None, tool_parser_ob
             self.generation_config = None
 
         self.decode_status = dict()
+        self.model_status_dict = dict()
         self.tool_parser_dict = dict()
         self.tokenizer = self._load_tokenizer()
         data_processor_logger.info(
@@ -267,6 +268,13 @@ def process_request(self, request, max_model_len=None, **kwargs):
             request.set("temperature", 1)
         if request.get("top_p") < _SAMPLING_EPS:
             request.set("top_p", _SAMPLING_EPS)
+        if self.reasoning_parser:
+            real_req_id = request.request_id.split("_")[0]
+            n = request.get("n", 1)
+            model_status = self.reasoning_parser.get_model_status(request.prompt_token_ids)
+            for idx in range(n):
+                self.model_status_dict[f"{real_req_id}_{idx}"] = model_status
+            request.enable_thinking = model_status == "think_start"
 
         data_processor_logger.info(f"Processed request: {request}")
         return request
@@ -341,6 +349,13 @@ def process_request_dict(self, request, max_model_len=None, **kwargs):
             request["temperature"] = 1
         if request.get("top_p") < _SAMPLING_EPS:
             request["top_p"] = _SAMPLING_EPS
+        if self.reasoning_parser:
+            real_req_id = request["request_id"].split("_")[0]
+            model_status = self.reasoning_parser.get_model_status(request["prompt_token_ids"])
+            n = request.get("n", 1)
+            for idx in range(n):
+                self.model_status_dict[f"{real_req_id}_{idx}"] = model_status
+            request["enable_thinking"] = model_status == "think_start"
 
         data_processor_logger.info(f"Processed request dict: {request}")
         return request
@@ -364,21 +379,21 @@ def process_response(self, response_dict, **kwargs):
         if token_ids[-1] == self.tokenizer.eos_token_id:
             token_ids = token_ids[:-1]
         full_text = self.tokenizer.decode(token_ids)
-
-        # 模型支持思考,并且支持思考
+        response_dict.outputs.text = full_text
         if self.reasoning_parser:
-            reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
+            reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
+                full_text, response_dict, self.model_status_dict[req_id]
+            )
             response_dict.outputs.text = text
             response_dict.outputs.reasoning_content = reasoning_content
-        else:
-            # 模型不支持思考,并且没单独设置enable_thinking为false
-            response_dict.outputs.text = full_text
         if self.tool_parser_obj:
             tool_parser = self.tool_parser_obj(self.tokenizer)
             tool_call_info = tool_parser.extract_tool_calls(full_text, response_dict)
             if tool_call_info.tools_called:
                 response_dict.outputs.tool_calls = tool_call_info.tool_calls
                 response_dict.outputs.text = tool_call_info.content
+        if req_id in self.model_status_dict:
+            del self.model_status_dict[req_id]
         data_processor_logger.info(f"req_id:{req_id}, token_ids: {token_ids}")
 
         return response_dict
@@ -393,7 +408,6 @@ def process_response_dict_normal(self, response_dict, **kwargs):
         Returns:
             Dict: response contain text fields
         """
-        enable_thinking = kwargs.get("enable_thinking")
         token_ids = response_dict["outputs"]["token_ids"]
         is_end = response_dict["finished"]
         req_id = response_dict["request_id"]
@@ -404,14 +418,17 @@ def process_response_dict_normal(self, response_dict, **kwargs):
         if is_end:
             full_text = previous_texts + delta_text
             response_dict["outputs"]["completion_tokens"] = full_text
-            if enable_thinking and self.reasoning_parser:
-                reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
+            response_dict["outputs"]["text"] = full_text
+            if self.reasoning_parser:
+                reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
+                    full_text,
+                    response_dict,
+                    self.model_status_dict[req_id],
+                )
                 response_dict["outputs"]["text"] = text
                 response_dict["outputs"]["reasoning_content"] = reasoning_content
                 reasoning_tokens = self.tokenizer.tokenize(reasoning_content)
                 response_dict["outputs"]["reasoning_token_num"] = len(reasoning_tokens)
-            else:
-                response_dict["outputs"]["text"] = full_text
             if self.tool_parser_obj:
                 tool_parser = self.tool_parser_obj(self.tokenizer)
                 tool_call_info = tool_parser.extract_tool_calls(full_text, response_dict)
@@ -420,6 +437,8 @@ def process_response_dict_normal(self, response_dict, **kwargs):
                     response_dict["outputs"]["text"] = tool_call_info.content
             data_processor_logger.info(f"req_id:{req_id}, decode_status: {self.decode_status[req_id]}")
             del self.decode_status[req_id]
+            if req_id in self.model_status_dict:
+                del self.model_status_dict[req_id]
         return response_dict
 
     def process_response_dict_streaming(self, response_dict, **kwargs):
@@ -432,7 +451,6 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
         Returns:
             Dict: response contain text fields
         """
-        enable_thinking = kwargs.get("enable_thinking")
         is_end = response_dict["finished"]
         req_id = response_dict["request_id"]
         token_ids = response_dict["outputs"]["token_ids"]
@@ -442,9 +460,7 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
                 token_ids = token_ids[:-1]
         delta_text, previous_token_ids, previous_texts = self.ids2tokens(token_ids, req_id)
         response_dict["outputs"]["completion_tokens"] = delta_text
-        if self.reasoning_parser and (
-            enable_thinking or self.reasoning_parser.__class__.__name__ == "ErnieX1ReasoningParser"
-        ):
+        if self.reasoning_parser:
             reasoning_delta_message = self.reasoning_parser.extract_reasoning_content_streaming(
                 previous_texts,
                 previous_texts + delta_text,
@@ -452,6 +468,7 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
                 previous_token_ids,
                 previous_token_ids + token_ids,
                 token_ids,
+                self.model_status_dict[req_id],
             )
             response_dict["outputs"]["delta_message"] = reasoning_delta_message
             reasoning_content = reasoning_delta_message.reasoning_content if reasoning_delta_message else None
@@ -478,6 +495,8 @@ def process_response_dict_streaming(self, response_dict, **kwargs):
             del self.decode_status[req_id]
             if req_id in self.tool_parser_dict:
                 del self.tool_parser_dict[req_id]
+            if req_id in self.model_status_dict:
+                del self.model_status_dict[req_id]
         return response_dict
 
     def process_response_dict(self, response_dict, **kwargs):
@@ -490,16 +509,12 @@ def process_response_dict(self, response_dict, **kwargs):
         Returns:
             Dict: response contain text fields
         """
-        enable_thinking = kwargs.pop("enable_thinking", True)
-        if enable_thinking is None:
-            enable_thinking = True
         stream = kwargs.get("stream", True)
         if stream:
-            return self.process_response_dict_streaming(response_dict, enable_thinking=enable_thinking, **kwargs)
+            return self.process_response_dict_streaming(response_dict, **kwargs)
         else:
             return self.process_response_dict_normal(
                 response_dict=response_dict,
-                enable_thinking=enable_thinking,
                 **kwargs,
             )
 
diff --git a/fastdeploy/reasoning/ernie_45_vl_thinking_reasoning_parser.py b/fastdeploy/reasoning/ernie_45_vl_thinking_reasoning_parser.py
index 939a0a4348b..fa394545802 100644
--- a/fastdeploy/reasoning/ernie_45_vl_thinking_reasoning_parser.py
+++ b/fastdeploy/reasoning/ernie_45_vl_thinking_reasoning_parser.py
@@ -35,25 +35,53 @@ class Ernie45VLThinkingReasoningParser(ReasoningParser):
 
     def __init__(self, tokenizer):
         super().__init__(tokenizer)
-        self.think_end_token = "</think>"
-        self.tool_begin_token = "<tool_call>"
+        token_definitions = {
+            "think_start_token": "<think>",
+            "think_end_token": "</think>",
+            "tool_call_start_token": "<tool_call>",
+            "tool_call_end_token": "</tool_call>",
+        }
 
         if not self.model_tokenizer:
             raise ValueError(
                 "The model tokenizer must be passed to the ReasoningParser " "constructor during construction."
             )
-
-        self.think_end_token_id = self.vocab.get(self.think_end_token)
-        self.tool_begin_token_id = self.vocab.get(self.tool_begin_token)
-        if self.tool_begin_token_id is None:
-            self.tool_begin_token_id = -1
-
-        if self.think_end_token_id is None:
-            raise RuntimeError("Test reasoning parser could not locate think end tokens in the tokenizer!")
+        missing_tokens = []
+        for name, token_value in token_definitions.items():
+            setattr(self, name, token_value)
+            token_id = self.vocab.get(token_value)
+            setattr(self, f"{name}_id", token_id)
+            if token_id is None:
+                missing_tokens.append(f"{name.replace('_', ' ')} token")
+
+        if missing_tokens:
+            raise RuntimeError(
+                f"ernie vl reasoning parser could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
+            )
+        self.token_status_mapping = {
+            self.think_start_token_id: "think_start",
+            self.think_end_token_id: "think_end",
+            self.tool_call_start_token_id: "tool_call_start",
+            self.tool_call_end_token_id: "tool_call_end",
+        }
 
     def is_reasoning_end(self, input_ids: list[int]) -> bool:
         return self.think_end_token_id in input_ids
 
+    def find_last_special_token(self, prompt_token_ids: list[int]) -> int:
+        for i in range(len(prompt_token_ids) - 1, -1, -1):
+            if prompt_token_ids[i] in self.token_status_mapping:
+                return prompt_token_ids[i]
+        return -1
+
+    def get_model_status(self, prompt_token_ids: list[int]):
+        special_token_id = self.find_last_special_token(prompt_token_ids)
+
+        if special_token_id == -1:
+            return "think_start"
+
+        return self.token_status_mapping[special_token_id]
+
     def extract_reasoning_content_streaming(
         self,
         previous_text: str,
@@ -62,6 +90,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        model_status: str,
     ) -> Union[DeltaMessage, None]:
         """
         Extract reasoning content from a delta message.
@@ -71,36 +100,46 @@ def extract_reasoning_content_streaming(
         - 'abc' goes to reasoning_content
         - 'xyz' goes to content
         """
-        if self.think_end_token not in current_text:
-            return DeltaMessage(reasoning_content=delta_text)
-        # Skip single special tokens
-        if len(delta_token_ids) == 1 and delta_token_ids[0] == self.think_end_token_id:
-            return None
-        if self._is_with_tool(current_text=current_text, current_token_ids=current_token_ids):
+        if model_status == "think_start":
+            if self.think_end_token not in current_text:
+                return DeltaMessage(reasoning_content=delta_text)
+            # Skip single special tokens
+            if len(delta_token_ids) == 1 and delta_token_ids[0] == self.think_end_token_id:
+                return None
+            if self._is_with_tool(current_text=current_text, current_token_ids=current_token_ids):
+                if self.think_end_token in delta_text:
+                    think_begin = delta_text.find(self.think_end_token)
+                    reasoning_content = delta_text[:think_begin]
+                    return DeltaMessage(reasoning_content=reasoning_content)
+                return None
             if self.think_end_token in delta_text:
-                think_begin = delta_text.find(self.think_end_token)
-                reasoning_content = delta_text[:think_begin]
-                return DeltaMessage(reasoning_content=reasoning_content)
+                reasoning_content, _, content = delta_text.partition(self.think_end_token)
+                striped_content = content.strip("\n")
+                if len(striped_content) == 0:
+                    return DeltaMessage(reasoning_content=reasoning_content) if reasoning_content else None
+                return (
+                    DeltaMessage(reasoning_content=reasoning_content, content=content)
+                    if reasoning_content
+                    else DeltaMessage(content=content)
+                )
+            think_end = current_text.find(self.think_end_token) + len(self.think_end_token)
+            suffix = current_text[think_end:]
+            striped_suffix = suffix.strip("\n")
+            if len(striped_suffix) == 0:
+                return None
+            return DeltaMessage(content=delta_text)
+        elif model_status == "think_end":
+            if current_text.lstrip("\n").startswith(self.tool_call_start_token):
+                return None
+            return DeltaMessage(content=delta_text)
+        else:
             return None
-        if self.think_end_token in delta_text:
-            reasoning_content, _, content = delta_text.partition(self.think_end_token)
-            striped_content = content.strip("\n")
-            if len(striped_content) == 0:
-                return DeltaMessage(reasoning_content=reasoning_content) if reasoning_content else None
-            return (
-                DeltaMessage(reasoning_content=reasoning_content, content=content)
-                if reasoning_content
-                else DeltaMessage(content=content)
-            )
-        think_end = current_text.find(self.think_end_token) + len(self.think_end_token)
-        suffix = current_text[think_end:]
-        striped_suffix = suffix.strip("\n")
-        if len(striped_suffix) == 0:
-            return None
-        return DeltaMessage(content=delta_text)
 
     def extract_reasoning_content(
-        self, model_output: str, request: ChatCompletionRequest
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+        model_status: str,
     ) -> tuple[Optional[str], Optional[str]]:
         """
         Extract reasoning content from the model output.
@@ -114,23 +153,30 @@ def extract_reasoning_content(
         """
 
         # Check if the model output contains the </think> tokens.
-        if self.think_end_token not in model_output:
-            return model_output, ""
-        reasoning_content, _, content = model_output.partition(self.think_end_token)
-        if self.tool_begin_token in content:
-            prefix, _, _ = content.partition(self.tool_begin_token)
-            prefix_strip = prefix.lstrip("\n")
-            if len(prefix_strip) > 0:
-                return reasoning_content, content
-            return reasoning_content, ""
-        return reasoning_content, content
+        if model_status == "think_start":
+            if self.think_end_token not in model_output:
+                return model_output, ""
+            reasoning_content, _, content = model_output.partition(self.think_end_token)
+            if self.tool_call_start_token in content:
+                prefix, _, _ = content.partition(self.tool_call_start_token)
+                prefix_strip = prefix.lstrip("\n")
+                if len(prefix_strip) > 0:
+                    return reasoning_content, content
+                return reasoning_content, ""
+            return reasoning_content, content
+        elif model_status == "think_end":
+            if model_output.lstrip("\n").startswith(self.tool_call_start_token):
+                return "", ""
+            return "", model_output
+        else:
+            return "", ""
 
     def _is_with_tool(self, current_text: str, current_token_ids: Sequence[int]) -> bool:
         think_end_index = current_text.find(self.think_end_token)
         think_end = think_end_index + len(self.think_end_token)
         middle_str = current_text[think_end:]
-        if self.tool_begin_token_id in current_token_ids:
-            prefix, _, _ = middle_str.partition(self.tool_begin_token)
+        if self.tool_call_start_token_id in current_token_ids:
+            prefix, _, _ = middle_str.partition(self.tool_call_start_token)
             striped_prefix = prefix.strip("\n")
             if len(striped_prefix) > 0:
                 return False
diff --git a/fastdeploy/reasoning/ernie_vl_reasoning_parsers.py b/fastdeploy/reasoning/ernie_vl_reasoning_parsers.py
index 18e15f18aa3..cafffbb8b08 100644
--- a/fastdeploy/reasoning/ernie_vl_reasoning_parsers.py
+++ b/fastdeploy/reasoning/ernie_vl_reasoning_parsers.py
@@ -35,20 +35,48 @@ class ErnieVLReasoningParser(ReasoningParser):
 
     def __init__(self, tokenizer):
         super().__init__(tokenizer)
-        self.think_end_token = "</think>"
+        token_definitions = {
+            "think_start_token": "<think>",
+            "think_end_token": "</think>",
+        }
 
         if not self.model_tokenizer:
-            raise ValueError(
-                "The model tokenizer must be passed to the ReasoningParser " "constructor during construction."
-            )
+            raise ValueError("The model tokenizer must be passed to the ReasoningParser constructor.")
+
+        missing_tokens = []
+        for name, token_value in token_definitions.items():
+            setattr(self, name, token_value)
+            token_id = self.vocab.get(token_value)
+            setattr(self, f"{name}_id", token_id)
+            if token_id is None:
+                missing_tokens.append(f"{name.replace('_', ' ')} token")
 
-        self.think_end_token_id = self.vocab.get(self.think_end_token)
-        if self.think_end_token_id is None:
-            raise RuntimeError("Ernie VL reasoning parser could not locate think end " "tokens in the tokenizer!")
+        if missing_tokens:
+            raise RuntimeError(
+                f"ernie vl reasoning parser could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
+            )
+        self.token_status_mapping = {
+            self.think_start_token_id: "think_start",
+            self.think_end_token_id: "think_end",
+        }
 
     def is_reasoning_end(self, input_ids: list[int]) -> bool:
         return self.think_end_token_id in input_ids
 
+    def find_last_special_token(self, prompt_token_ids: list[int]) -> int:
+        for i in range(len(prompt_token_ids) - 1, -1, -1):
+            if prompt_token_ids[i] in self.token_status_mapping:
+                return prompt_token_ids[i]
+        return -1
+
+    def get_model_status(self, prompt_token_ids: list[int]):
+        special_token_id = self.find_last_special_token(prompt_token_ids)
+
+        if special_token_id == -1:
+            return "think_start"
+
+        return self.token_status_mapping[special_token_id]
+
     def extract_reasoning_content_streaming(
         self,
         previous_text: str,
@@ -57,6 +85,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        model_status: str,
     ) -> Union[DeltaMessage, None]:
         """
         Extract reasoning content from a delta message.
@@ -69,18 +98,23 @@ def extract_reasoning_content_streaming(
         # Skip single special tokens
         if len(delta_token_ids) == 1 and delta_token_ids[0] == self.think_end_token_id:
             return None
-        if self.think_end_token_id in delta_token_ids:
-            end_index = delta_text.find(self.think_end_token)
-            reasoning_content = delta_text[:end_index]
-            content = delta_text[end_index + len(self.think_end_token) :]
-            return DeltaMessage(reasoning_content=reasoning_content, content=content)
-        elif self.think_end_token_id in previous_token_ids:
-            return DeltaMessage(content=delta_text)
-        else:
+        if model_status == "think_start":
+            if self.think_end_token_id in delta_token_ids:
+                end_index = delta_text.find(self.think_end_token)
+                reasoning_content = delta_text[:end_index]
+                content = delta_text[end_index + len(self.think_end_token) :]
+                return DeltaMessage(reasoning_content=reasoning_content, content=content)
+            if self.think_end_token_id in previous_token_ids:
+                return DeltaMessage(content=delta_text)
             return DeltaMessage(reasoning_content=delta_text)
+        else:
+            return DeltaMessage(content=delta_text)
 
     def extract_reasoning_content(
-        self, model_output: str, request: ChatCompletionRequest
+        self,
+        model_output: str,
+        request: ChatCompletionRequest,
+        model_status: str,
     ) -> tuple[Optional[str], Optional[str]]:
         """
         Extract reasoning content from the model output.
@@ -92,11 +126,12 @@ def extract_reasoning_content(
         Returns:
             tuple[Optional[str], Optional[str]]: reasoning content and content
         """
-
         # Check if the model output contains the </think> tokens.
-        if self.think_end_token not in model_output:
+        if model_status == "think_start":
+            if self.think_end_token not in model_output:
+                return "", model_output
+            reasoning_content, _, content = model_output.partition(self.think_end_token)
+            final_content = content or ""
+            return reasoning_content, final_content
+        else:
             return "", model_output
-        reasoning_content, _, content = model_output.partition(self.think_end_token)
-
-        final_content = content or ""
-        return reasoning_content, final_content
diff --git a/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py b/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py
index 77fc1d5ada8..3bb0cd4fb46 100644
--- a/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py
+++ b/fastdeploy/reasoning/ernie_x1_reasoning_parsers.py
@@ -18,19 +18,55 @@ class ErnieX1ReasoningParser(ReasoningParser):
 
     def __init__(self, tokenizer):
         super().__init__(tokenizer)
-        self.think_end_token = "</think>"
-        self.response_start_token = "<response>"
-        self.response_end_token = "</response>"
-        self.tool_call_start_token = "<tool_call>"
-        self.tool_call_end_token = "</tool_call>"
+
+        # 定义所有需要检查的token
+        token_definitions = {
+            "think_start_token": "<think>",
+            "think_end_token": "</think>",
+            "response_start_token": "<response>",
+            "response_end_token": "</response>",
+            "tool_call_start_token": "<tool_call>",
+            "tool_call_end_token": "</tool_call>",
+        }
 
         if not self.model_tokenizer:
             raise ValueError("The model tokenizer must be passed to the ReasoningParser constructor.")
 
-        self.think_end_token_id = self.vocab.get("</think>")
-        if self.think_end_token_id is None:
-            raise RuntimeError("Could not find think end token id in tokenizer vocabulary")
-        self.tool_call_start_token_id = self.vocab.get("<tool_call>")
+        missing_tokens = []
+        for name, token_value in token_definitions.items():
+            setattr(self, name, token_value)
+            token_id = self.vocab.get(token_value)
+            setattr(self, f"{name}_id", token_id)
+            if token_id is None:
+                missing_tokens.append(token_value)
+
+        if missing_tokens:
+            raise RuntimeError(
+                f"ernie x1 reasoning parser could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
+            )
+
+        self.token_status_mapping = {
+            self.think_start_token_id: "think_start",
+            self.think_end_token_id: "think_end",
+            self.response_start_token_id: "response_start",
+            self.response_end_token_id: "response_end",
+            self.tool_call_start_token_id: "tool_call_start",
+            self.tool_call_end_token_id: "tool_call_end",
+        }
+
+    def find_last_special_token(self, prompt_token_ids: list[int]) -> int:
+        for i in range(len(prompt_token_ids) - 1, -1, -1):
+            if prompt_token_ids[i] in self.token_status_mapping:
+                return prompt_token_ids[i]
+        return -1
+
+    def get_model_status(self, prompt_token_ids: list[int]):
+        special_token_id = self.find_last_special_token(prompt_token_ids)
+
+        if special_token_id == -1:
+            return "think_start"
+
+        return self.token_status_mapping[special_token_id]
 
     def extract_reasoning_content_streaming(
         self,
@@ -40,64 +76,82 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        model_status: str,
     ) -> Union[DeltaMessage, None]:
-        # Ignore the single </think> token
-        if len(delta_token_ids) == 1 and delta_token_ids[0] == self.think_end_token_id:
-            return None
 
-        # --- Thinking stage handling ---
-        if not previous_text.endswith(self.think_end_token) and self.think_end_token not in previous_text:
-            # If delta is </think>, stop thinking, do not return
-            if delta_text.startswith(self.think_end_token):
-                return None
-            # Otherwise, return thinking content (keep \n as-is)
-            return DeltaMessage(reasoning_content=delta_text)
-
-        # --- After thinking ends, check tool_call or response ---
-        remaining_text = previous_text + delta_text
-        after_think = remaining_text[remaining_text.find(self.think_end_token) + len(self.think_end_token) :]
-        after_think = after_think.lstrip("\n")
-
-        # Handle tool_call case: skip it
-        if after_think.startswith(self.tool_call_start_token):
+        if len(delta_token_ids) == 1 and delta_token_ids[0] in [
+            self.think_end_token_id,
+            self.response_start_token_id,
+            self.response_end_token_id,
+            self.tool_call_start_token_id,
+            self.tool_call_end_token_id,
+        ]:
             return None
 
-        # Handle response case
-        if after_think.startswith(self.response_start_token) and self.response_end_token not in after_think:
-            # Do not return when <response> tag itself appears
-            if delta_text == self.response_start_token or delta_text == self.response_end_token:
-                return None
-            return DeltaMessage(content=delta_text)
+        if model_status == "think_start":
+            if self.think_end_token in delta_text:
+                response_content = ""
+                end_index = delta_text.find(self.think_end_token)
+                reasoning_content = delta_text[:end_index]
+                response_start_pos = delta_text.find(self.response_start_token)
+                if response_start_pos != -1:
+                    response_content = self._extract_response_content(
+                        delta_text[response_start_pos + len(self.response_start_token) :]
+                    )
+                return DeltaMessage(reasoning_content=reasoning_content, content=response_content)
+            elif self.think_end_token in previous_text:
+                if self.response_start_token in previous_text and self.response_end_token not in previous_text:
+                    return DeltaMessage(content=delta_text)
+            else:
+                return DeltaMessage(reasoning_content=delta_text)
+        elif model_status == "think_end":
+            if self.response_start_token in previous_text and self.response_end_token not in previous_text:
+                return DeltaMessage(content=delta_text)
+        elif model_status == "response_start":
+            if self.response_end_token not in previous_text:
+                return DeltaMessage(content=delta_text)
 
-        # Default case: return nothing
         return None
 
-    def extract_reasoning_content(self, model_output: str, request: ChatCompletionRequest) -> Tuple[str, str]:
+    def extract_reasoning_content(
+        self, model_output: str, request: ChatCompletionRequest, model_status: str
+    ) -> Tuple[str, str]:
+        """
+        优化版解析器。保留推理和响应内容中的换行符，
+        仅删除闭合标签前的单个换行符。
+        """
         reasoning_content = ""
         response_content = ""
 
-        think_end_pos = model_output.find(self.think_end_token)
-        if think_end_pos != -1:
-            reasoning_content = model_output[:think_end_pos]
-
-            remaining = model_output[think_end_pos + len(self.think_end_token) :]
+        if model_status in ["think_start", "think_end"]:
+            if model_status == "think_start":
+                think_end_pos = model_output.find(self.think_end_token)
+                if think_end_pos != -1:
+                    reasoning_content = model_output[:think_end_pos]
+                    remaining = model_output[think_end_pos + len(self.think_end_token) :].lstrip("\n")
+                else:
+                    reasoning_content = model_output
+                    remaining = ""
+            else:
+                remaining = model_output.lstrip("\n")
 
-            # find <response> or <tool>
-            response_pos = remaining.find(self.response_start_token)
-            tool_pos = remaining.find(self.tool_call_start_token)
+            response_start_pos = remaining.find(self.response_start_token)
+            if response_start_pos != -1:
+                response_content = self._extract_response_content(
+                    remaining[response_start_pos + len(self.response_start_token) :]
+                )
 
-            # <response> first
-            if response_pos != -1 and (tool_pos == -1 or response_pos < tool_pos):
-                # The content after the response_start position
-                remaining_response = remaining[response_pos + len(self.response_start_token) :]
-                response_end_pos = remaining_response.find(self.response_end_token)
-                if response_end_pos != -1:
-                    response_content = remaining_response[:response_end_pos]
-                else:
-                    response_content = remaining_response
-            # The content after the response_start position is tool_call
-        else:
-            reasoning_content = model_output
-            response_content = ""
+        elif model_status == "response_start":
+            response_content = self._extract_response_content(model_output)
 
         return reasoning_content, response_content
+
+    def _extract_response_content(self, remaining: str) -> str:
+        """
+        Extracts response content, ensuring that the last newline before
+        the </response> tag is removed.
+        """
+        response_end_pos = remaining.find(self.response_end_token)
+        if response_end_pos != -1:
+            return remaining[:response_end_pos]
+        return remaining
diff --git a/fastdeploy/reasoning/qwen3_reasoning_parsers.py b/fastdeploy/reasoning/qwen3_reasoning_parsers.py
index 463cab83df3..b01cdf0d692 100644
--- a/fastdeploy/reasoning/qwen3_reasoning_parsers.py
+++ b/fastdeploy/reasoning/qwen3_reasoning_parsers.py
@@ -35,22 +35,50 @@ class Qwen3ReasoningParser(ReasoningParser):
 
     def __init__(self, tokenizer):
         super().__init__(tokenizer)
-        self.think_start_token = "<think>"
-        self.think_end_token = "</think>"
+
+        # 定义所有需要检查的token
+        token_definitions = {
+            "think_start_token": "<think>",
+            "think_end_token": "</think>",
+        }
 
         if not self.model_tokenizer:
-            raise ValueError(
-                "The model tokenizer must be passed to the ReasoningParser " "constructor during construction."
+            raise ValueError("The model tokenizer must be passed to the ReasoningParser constructor.")
+
+        missing_tokens = []
+        for name, token_value in token_definitions.items():
+            setattr(self, name, token_value)
+            token_id = self.vocab.get(token_value)
+            setattr(self, f"{name}_id", token_id)
+            if token_id is None:
+                missing_tokens.append(token_value)
+
+        if missing_tokens:
+            raise RuntimeError(
+                f"Qwen3 reasoning parser could not find the following token ids in tokenizer vocabulary: {', '.join(missing_tokens)}"
             )
-
-        self.think_start_token_id = self.vocab.get(self.think_start_token)
-        self.think_end_token_id = self.vocab.get(self.think_end_token)
-        if self.think_end_token_id is None:
-            raise RuntimeError("Qwen3  reasoning parser could not locate think end " "tokens in the tokenizer!")
+        self.token_status_mapping = {
+            self.think_start_token_id: "think_start",
+            self.think_end_token_id: "think_end",
+        }
 
     def is_reasoning_end(self, input_ids: list[int]) -> bool:
         return self.think_end_token_id in input_ids
 
+    def find_last_special_token(self, prompt_token_ids: list[int]) -> int:
+        for i in range(len(prompt_token_ids) - 1, -1, -1):
+            if prompt_token_ids[i] in self.token_status_mapping:
+                return prompt_token_ids[i]
+        return -1
+
+    def get_model_status(self, prompt_token_ids: list[int]):
+        special_token_id = self.find_last_special_token(prompt_token_ids)
+
+        if special_token_id == -1:
+            return "think_start"
+
+        return self.token_status_mapping[special_token_id]
+
     def extract_reasoning_content_streaming(
         self,
         previous_text: str,
@@ -59,6 +87,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids: Sequence[int],
         current_token_ids: Sequence[int],
         delta_token_ids: Sequence[int],
+        model_status: str,
     ) -> Union[DeltaMessage, None]:
         """
         Extract reasoning content from a delta message.
@@ -71,39 +100,42 @@ def extract_reasoning_content_streaming(
         if len(delta_token_ids) == 1 and (delta_token_ids[0] in [self.think_start_token_id, self.think_end_token_id]):
             return None
 
-        # </think> in delta
-        if self.think_end_token_id in delta_token_ids:
-            # <think> in delta, </think> in delta, extract reasoning content
-            if self.think_start_token_id in delta_token_ids:
+        if model_status == "think_start":
+            # </think> in delta
+            if self.think_end_token_id in delta_token_ids:
+                # <think> in delta, </think> in delta, extract reasoning content
+                if self.think_start_token_id in delta_token_ids:
+                    start_index = delta_text.find(self.think_start_token)
+                    end_index = delta_token_ids.find(self.think_end_token)
+                    reasoning_content = delta_text[start_index + len(self.think_start_token) : end_index]
+                    content = delta_text[end_index + len(self.think_end_token) :]
+                    return DeltaMessage(reasoning_content=reasoning_content, content=content)
+                # <think> in previous, </think> in delta,
+                else:
+                    end_index = delta_text.find(self.think_end_token)
+                    reasoning_content = delta_text[:end_index]
+                    content = delta_text[end_index + len(self.think_end_token) :]
+                    content = content if content else None
+                    return DeltaMessage(reasoning_content=reasoning_content, content=content)
+            # </think> in previous reasoning content continues
+            elif self.think_end_token_id in previous_token_ids:
+                return DeltaMessage(content=delta_text)
+            # <think> in previous
+            elif self.think_start_token_id in previous_token_ids:
+                return DeltaMessage(reasoning_content=delta_text)
+            # <think> in delta
+            elif self.think_start_token_id in delta_token_ids:
                 start_index = delta_text.find(self.think_start_token)
-                end_index = delta_token_ids.find(self.think_end_token)
-                reasoning_content = delta_text[start_index + len(self.think_start_token) : end_index]
-                content = delta_text[end_index + len(self.think_end_token) :]
+                reasoning_content = delta_text[start_index + len(self.think_start_token) :]
+                content = ""
                 return DeltaMessage(reasoning_content=reasoning_content, content=content)
-            # <think> in previous, </think> in delta,
             else:
-                end_index = delta_text.find(self.think_end_token)
-                reasoning_content = delta_text[:end_index]
-                content = delta_text[end_index + len(self.think_end_token) :]
-                content = content if content else None
-                return DeltaMessage(reasoning_content=reasoning_content, content=content)
-        # </think> in previous reasoning content continues
-        elif self.think_end_token_id in previous_token_ids:
-            return DeltaMessage(content=delta_text)
-        # <think> in previous
-        elif self.think_start_token_id in previous_token_ids:
-            return DeltaMessage(reasoning_content=delta_text)
-        # <think> in delta
-        elif self.think_start_token_id in delta_token_ids:
-            start_index = delta_text.find(self.think_start_token)
-            reasoning_content = delta_text[start_index + len(self.think_start_token) :]
-            content = ""
-            return DeltaMessage(reasoning_content=reasoning_content, content=content)
+                return DeltaMessage(reasoning_content=delta_text)
         else:
-            return DeltaMessage(reasoning_content=delta_text)
+            return DeltaMessage(content=delta_text)
 
     def extract_reasoning_content(
-        self, model_output: str, request: ChatCompletionRequest
+        self, model_output: str, request: ChatCompletionRequest, model_status: str
     ) -> tuple[Optional[str], Optional[str]]:
         """
         Extract reasoning content from the model output.
@@ -116,36 +148,39 @@ def extract_reasoning_content(
             tuple[Optional[str], Optional[str]]: reasoning content and content
         """
 
-        # 检查是否包含结束标签
-        if self.think_end_token not in model_output:
-            return None, model_output
-
-        # 检查是否有起始标签
-        if self.think_start_token in model_output:
-            # 标准格式：<think>content</think>answer
-            if self.think_start_token not in model_output or self.think_end_token not in model_output:
-                return None, model_output
-            # Check if the <think> is present in the model output, remove it
-            # if it is present.
-            model_output_parts = model_output.partition(self.think_start_token)
-            model_output = model_output_parts[2] if model_output_parts[1] else model_output_parts[0]
-            # Check if the model output contains the </think> tokens.
-            # If the end token is not found, return the model output as is.
+        if model_status == "think_start":
+            # 检查是否包含结束标签
             if self.think_end_token not in model_output:
                 return None, model_output
 
-            # Extract reasoning content from the model output.
-            reasoning_content, _, content = model_output.partition(self.think_end_token)
-
-            final_content = content or None
-            return reasoning_content, final_content
-        else:
-            # 缺少起始标签的格式：content</think>answer
-            parts = model_output.split(self.think_end_token, 1)
-
-            if len(parts) == 2:
-                reasoning_content = parts[0].strip()
-                final_content = parts[1].strip() if parts[1].strip() else None
+            # 检查是否有起始标签
+            if self.think_start_token in model_output:
+                # 标准格式：<think>content</think>answer
+                if self.think_start_token not in model_output or self.think_end_token not in model_output:
+                    return None, model_output
+                # Check if the <think> is present in the model output, remove it
+                # if it is present.
+                model_output_parts = model_output.partition(self.think_start_token)
+                model_output = model_output_parts[2] if model_output_parts[1] else model_output_parts[0]
+                # Check if the model output contains the </think> tokens.
+                # If the end token is not found, return the model output as is.
+                if self.think_end_token not in model_output:
+                    return None, model_output
+
+                # Extract reasoning content from the model output.
+                reasoning_content, _, content = model_output.partition(self.think_end_token)
+
+                final_content = content or None
                 return reasoning_content, final_content
+            else:
+                # 缺少起始标签的格式：content</think>answer
+                parts = model_output.split(self.think_end_token, 1)
+
+                if len(parts) == 2:
+                    reasoning_content = parts[0].strip()
+                    final_content = parts[1].strip() if parts[1].strip() else None
+                    return reasoning_content, final_content
 
-        return None, model_output
+            return None, model_output
+        else:
+            return None, model_output
diff --git a/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py b/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
index e51018f201e..52c845071d0 100644
--- a/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
+++ b/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
@@ -482,7 +482,7 @@ def test_chat_with_thinking(openai_client, capsys):
         max_tokens=10,
         extra_body={"chat_template_kwargs": {"enable_thinking": False}},
     )
-    assert response.choices[0].message.reasoning_content is None
+    assert response.choices[0].message.reasoning_content == ""
     assert "</think>" not in response.choices[0].message.content
 
     # test logic
@@ -957,4 +957,4 @@ def test_thinking_logic_flag(openai_client, capsys):
             "chat_template_kwargs": {"enable_thinking": False},
         },
     )
-    assert response_case_3.choices[0].message.reasoning_content is None
+    assert response_case_3.choices[0].message.reasoning_content == ""
diff --git a/tests/e2e/test_EB_VL_Lite_serving.py b/tests/e2e/test_EB_VL_Lite_serving.py
index f93f355a754..e2b4e4e60f8 100644
--- a/tests/e2e/test_EB_VL_Lite_serving.py
+++ b/tests/e2e/test_EB_VL_Lite_serving.py
@@ -545,7 +545,7 @@ def test_chat_with_thinking(openai_client, capsys):
         max_tokens=10,
         extra_body={"chat_template_kwargs": {"enable_thinking": False}},
     )
-    assert response.choices[0].message.reasoning_content is None
+    assert response.choices[0].message.reasoning_content == ""
     assert "</think>" not in response.choices[0].message.content
 
     # test logic
@@ -716,4 +716,4 @@ def test_thinking_logic_flag(openai_client, capsys):
             "chat_template_kwargs": {"enable_thinking": False},
         },
     )
-    assert response_case_3.choices[0].message.reasoning_content is None
+    assert response_case_3.choices[0].message.reasoning_content == ""
diff --git a/tests/e2e/test_EB_VL_Lite_sot_serving.py b/tests/e2e/test_EB_VL_Lite_sot_serving.py
index b2d8add1b0e..b21c99329a5 100644
--- a/tests/e2e/test_EB_VL_Lite_sot_serving.py
+++ b/tests/e2e/test_EB_VL_Lite_sot_serving.py
@@ -312,7 +312,7 @@ def test_chat_with_thinking(openai_client, capsys):
         max_tokens=10,
         extra_body={"chat_template_kwargs": {"enable_thinking": False}},
     )
-    assert response.choices[0].message.reasoning_content is None
+    assert response.choices[0].message.reasoning_content == ""
     assert "</think>" not in response.choices[0].message.content
 
     # test logic
@@ -404,4 +404,4 @@ def test_thinking_logic_flag(openai_client, capsys):
             "chat_template_kwargs": {"enable_thinking": False},
         },
     )
-    assert response_case_3.choices[0].message.reasoning_content is None
+    assert response_case_3.choices[0].message.reasoning_content == ""
diff --git a/tests/entrypoints/openai/test_finish_reason.py b/tests/entrypoints/openai/test_finish_reason.py
index 4bdb3feefc8..d39cf917208 100644
--- a/tests/entrypoints/openai/test_finish_reason.py
+++ b/tests/entrypoints/openai/test_finish_reason.py
@@ -43,6 +43,8 @@ async def asyncSetUp(self):
             self.multi_modal_processor._check_mm_limits = Mock()
             self.multi_modal_processor.append_completion_tokens = Mock()
             self.multi_modal_processor.pack_outputs = lambda x: x
+            self.multi_modal_processor.reasoning_parser = None
+            self.multi_modal_processor.model_status_dict = {}
 
         self.engine_client = Mock()
         self.engine_client.connection_initialized = False
@@ -242,7 +244,7 @@ async def test_chat_full_max_tokens(self, mock_data_logger, mock_processor_class
         mock_processor_instance = Mock()
         mock_processor_instance.enable_multimodal_content.return_value = True
 
-        async def mock_process_response_chat_async(response, stream, enable_thinking, include_stop_str_in_output):
+        async def mock_process_response_chat_async(response, stream, include_stop_str_in_output):
             yield response
 
         mock_processor_instance.process_response_chat = mock_process_response_chat_async
@@ -423,7 +425,7 @@ async def test_chat_stream_max_tokens(self, mock_api_logger, mock_processor_clas
         mock_processor_instance = Mock()
         mock_processor_instance.enable_multimodal_content.return_value = False
 
-        async def mock_process_response_chat_async(response, stream, enable_thinking, include_stop_str_in_output):
+        async def mock_process_response_chat_async(response, stream, include_stop_str_in_output):
             if isinstance(response, list):
                 for res in response:
                     yield res
diff --git a/tests/entrypoints/openai/test_max_streaming_tokens.py b/tests/entrypoints/openai/test_max_streaming_tokens.py
index 1e728aa3b8d..ab950e2b5ae 100644
--- a/tests/entrypoints/openai/test_max_streaming_tokens.py
+++ b/tests/entrypoints/openai/test_max_streaming_tokens.py
@@ -149,7 +149,7 @@ async def test_integration_with_chat_stream_generator(self, mock_processor_class
 
         mock_processor_instance = Mock()
 
-        async def mock_process_response_chat_single(response, stream, enable_thinking, include_stop_str_in_output):
+        async def mock_process_response_chat_single(response, stream, include_stop_str_in_output):
             yield response
 
         mock_processor_instance.process_response_chat = mock_process_response_chat_single
@@ -515,7 +515,7 @@ async def test_chat_stream_usage_fields(self, mock_response_processor, api_serve
 
         mock_processor_instance = Mock()
 
-        async def mock_process_response_chat(response, stream, enable_thinking, include_stop_str_in_output):
+        async def mock_process_response_chat(response, stream, include_stop_str_in_output):
             delta_msg_mock = Mock()
             delta_msg_mock.content = response["outputs"]["text"]
             if response["outputs"]["text"] == "a":
diff --git a/tests/entrypoints/openai/test_response_processors.py b/tests/entrypoints/openai/test_response_processors.py
index afab163b97e..34cade7cd82 100644
--- a/tests/entrypoints/openai/test_response_processors.py
+++ b/tests/entrypoints/openai/test_response_processors.py
@@ -48,7 +48,7 @@ async def test_text_only_mode(self):
         results = [
             r
             async for r in processor.process_response_chat(
-                request_outputs, stream=False, enable_thinking=False, include_stop_str_in_output=False
+                request_outputs, stream=False, include_stop_str_in_output=False
             )
         ]
 
@@ -67,7 +67,7 @@ async def test_streaming_text_and_image(self):
         results = [
             r
             async for r in self.processor_mm.process_response_chat(
-                request_outputs, stream=True, enable_thinking=False, include_stop_str_in_output=False
+                request_outputs, stream=True, include_stop_str_in_output=False
             )
         ]
 
@@ -94,7 +94,7 @@ async def test_streaming_buffer_accumulation(self):
         results = [
             r
             async for r in self.processor_mm.process_response_chat(
-                request_outputs, stream=True, enable_thinking=False, include_stop_str_in_output=False
+                request_outputs, stream=True, include_stop_str_in_output=False
             )
         ]
 
@@ -112,7 +112,7 @@ async def test_non_streaming_accumulate_and_emit(self):
         results = [
             r
             async for r in self.processor_mm.process_response_chat(
-                request_outputs, stream=False, enable_thinking=False, include_stop_str_in_output=False
+                request_outputs, stream=False, include_stop_str_in_output=False
             )
         ]
 
diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py
deleted file mode 100644
index 394a23f0f4e..00000000000
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-import unittest
-from unittest.mock import MagicMock
-
-from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest
-from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
-
-
-class TestOpenAIServingCompletion(unittest.TestCase):
-
-    def setUp(self):
-        """
-        Set up the test environment by creating an instance of the OpenAIServingChat class using Mock.
-        """
-        self.mock_engine = MagicMock()
-        self.chat_completion_handler = OpenAIServingChat(
-            self.mock_engine,
-            models=None,
-            pid=123,
-            ips=None,
-            max_waiting_time=10,
-            chat_template=None,
-        )
-
-    def test_enable_thinking(self):
-        request = ChatCompletionRequest(messages=[], chat_template_kwargs={})
-        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
-        self.assertEqual(enable_thinking, None)
-
-        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": True})
-        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
-        self.assertEqual(enable_thinking, True)
-
-        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": False})
-        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
-        self.assertEqual(enable_thinking, False)
-
-        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "close"}})
-        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
-        self.assertEqual(enable_thinking, False)
-
-        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "false"}})
-        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
-        self.assertEqual(enable_thinking, False)
-
-        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "open"}})
-        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
-        self.assertEqual(enable_thinking, True)
-
-        request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "123"}})
-        enable_thinking = self.chat_completion_handler._get_thinking_status(request)
-        self.assertEqual(enable_thinking, True)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/entrypoints/openai/tool_parsers/test_ernie_x1_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_ernie_x1_tool_parser.py
index e818801d935..1b8b58d1e95 100644
--- a/tests/entrypoints/openai/tool_parsers/test_ernie_x1_tool_parser.py
+++ b/tests/entrypoints/openai/tool_parsers/test_ernie_x1_tool_parser.py
@@ -52,33 +52,12 @@ def test_extract_tool_calls_complete(self):
         self.assertTrue(result.tools_called)
         self.assertEqual(result.tool_calls[0].function.name, "get_weather")
 
-    def test_extract_tool_calls_partial_arguments(self):
-        """Test partial extraction when arguments incomplete"""
-        output = '<tool_call>{"name": "get_weather", "arguments": {"location": "北"</tool_call>'
-        result = self.parser.extract_tool_calls(output, self.dummy_request)
-        self.assertFalse(result.tools_called)
-        self.assertEqual(result.tool_calls[0].function.name, "get_weather")
-
-    def test_extract_tool_calls_invalid_response_before_toolcall(self):
-        """Test case where <response> before <tool_call> is invalid"""
-        output = '<response>hello</response><tool_call>{"name": "get_weather", "arguments": {}}</tool_call>'
-        result = self.parser.extract_tool_calls(output, self.dummy_request)
-        self.assertFalse(result.tools_called)
-        self.assertIn("<response>", result.content)
-
     def test_extract_tool_calls_no_toolcall(self):
         """Test when no tool_call tags are present"""
         output = "no tool call here"
         result = self.parser.extract_tool_calls(output, self.dummy_request)
         self.assertFalse(result.tools_called)
 
-    def test_extract_tool_calls_invalid_json(self):
-        """Test tool_call with badly formatted JSON triggers fallback parser"""
-        output = '<tool_call>"name": "get_weather", "arguments": {</tool_call>'
-        result = self.parser.extract_tool_calls(output, self.dummy_request)
-        self.assertFalse(result.tools_called)
-        self.assertEqual(result.tool_calls[0].function.name, "get_weather")
-
     def test_extract_tool_calls_exception(self):
         """Force exception to cover error branch"""
         with patch(
diff --git a/tests/input/test_ernie4_5_processor.py b/tests/input/test_ernie4_5_processor.py
index 8c7386fef85..8ccb4e60f9c 100644
--- a/tests/input/test_ernie4_5_processor.py
+++ b/tests/input/test_ernie4_5_processor.py
@@ -73,6 +73,7 @@ def extract_reasoning_content_streaming(
         previous_token_ids,
         all_token_ids,
         delta_token_ids,
+        model_status,
     ):
         """Return a simple object with reasoning_content to cover reasoning branch."""
 
@@ -145,6 +146,7 @@ def _make_processor(self, reasoning=False, tool=False):
         tool_cls = MockToolParser if tool else None
         proc = Ernie4_5Processor("dummy-model", reasoning_parser_obj=reasoning_cls, tool_parser_obj=tool_cls)
         proc._apply_default_parameters = lambda req: req
+        proc.model_status_dict = {"req-1": "think_start"}
         return proc
 
     def test_update_bad_words(self):
diff --git a/tests/input/test_ernie_processor.py b/tests/input/test_ernie_processor.py
index 2a9dcb23cf8..6f5fad89403 100644
--- a/tests/input/test_ernie_processor.py
+++ b/tests/input/test_ernie_processor.py
@@ -4,6 +4,11 @@
 from fastdeploy.input.ernie4_5_processor import Ernie4_5Processor
 
 
+class MockReasoningParser:
+    def get_model_status(self, prompt_token_ids):
+        return "think_start"
+
+
 class TestErnie4_5ProcessorProcessResponseDictStreaming(unittest.TestCase):
     def setUp(self):
         # 创建 Ernie4_5Processor 实例的模拟对象
@@ -14,12 +19,13 @@ def setUp(self):
         # 设置必要的属性
         self.processor.tokenizer = MagicMock()
         self.processor.tokenizer.eos_token_id = 1
-        self.processor.decode_status = {}
+        self.processor.decode_status = {"test": []}
         self.processor.reasoning_end_dict = {}
         self.processor.tool_parser_dict = {}
         self.processor.generation_config = MagicMock()
         self.processor.eos_token_ids = [1]
-        self.processor.reasoning_parser = MagicMock()
+        self.processor.reasoning_parser = MockReasoningParser()
+        self.processor.model_status_dict = {"request-id_0": "think_start", "test": "think_start"}
 
         # 模拟 ids2tokens 方法
         def mock_ids2tokens(token_ids, task_id):
@@ -56,7 +62,7 @@ def mock_apply_default_parameters(request):
     def test_process_response_dict_streaming_normal_case(self):
         """测试正常情况下的流式响应处理"""
         # 准备输入
-        response_dict = {"finished": False, "request_id": "req1", "outputs": {"token_ids": [4, 5]}}
+        response_dict = {"finished": False, "request_id": "test", "outputs": {"token_ids": [4, 5]}}
         kwargs = {"enable_thinking": True}
 
         # 调用方法
@@ -67,6 +73,7 @@ def test_process_response_dict_streaming_normal_case(self):
 
     def test_process_request_dict(self):
         request_dict = {
+            "request_id": "123",
             "messages": [{"role": "user", "content": "Hello!"}],
             "chat_template_kwargs": {"chat_template": "Hello!"},
             "eos_token_ids": [1],
diff --git a/tests/input/test_ernie_vl_processor.py b/tests/input/test_ernie_vl_processor.py
index 92d24d5b96f..facc8c30cfa 100644
--- a/tests/input/test_ernie_vl_processor.py
+++ b/tests/input/test_ernie_vl_processor.py
@@ -1,12 +1,19 @@
 import unittest
 from unittest.mock import MagicMock, patch
 
+import numpy as np
+
 from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor
 
 
-class TestErnie4_5_vl_ProcessorProcessResponseDictStreaming(unittest.TestCase):
+class MockReasoningParser:
+    def get_model_status(self, prompt_token_ids):
+        return "think_start"
+
+
+class TestErnie4_5VLProcessorProcessResponseDictStreaming(unittest.TestCase):
     def setUp(self):
-        # 创建 Ernie4_5Processor 实例的模拟对象
+        # 创建 Ernie4_5_VLProcessor 实例的模拟对象
         with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None) as mock_init:
             self.processor = Ernie4_5_VLProcessor("model_path")
             mock_init.side_effect = lambda *args, **kwargs: print(f"__init__ called with {args}, {kwargs}")
@@ -14,38 +21,41 @@ def setUp(self):
         # 设置必要的属性
         self.processor.tokenizer = MagicMock()
         self.processor.tokenizer.eos_token_id = 1
-        self.processor.decode_status = {}
+        self.processor.decode_status = {"test": []}
         self.processor.reasoning_end_dict = {}
         self.processor.tool_parser_dict = {}
         self.processor.generation_config = MagicMock()
         self.processor.eos_token_ids = [1]
-        self.processor.reasoning_parser = MagicMock()
-        self.processor._check_mm_limits = MagicMock()
+        self.processor.reasoning_parser = MockReasoningParser()
+        self.processor.model_status_dict = {"test": "think_start"}
         self.processor.ernie4_5_processor = MagicMock()
-        self.processor.pack_outputs = MagicMock()
 
         # 模拟 ids2tokens 方法
         def mock_ids2tokens(token_ids, task_id):
-            self.processor.decode_status[task_id] = "mock_decode_status"
             return "delta_text", [2, 3], "previous_texts"
 
         self.processor.ids2tokens = mock_ids2tokens
 
-        def mock_messages2ids(request, **kwargs):
-            if "chat_template" in kwargs:
-                return [1]
-            else:
-                return [0]
+        def mock_request2ids(request, **kwargs):
+            return {"input_ids": np.array([1, 2, 3]), "prompt_token_ids": [0]}
+
+        def mock_check_mm_limits(item):
+            pass
 
         def mock_apply_default_parameters(request):
             return request
 
+        def mock_pack_outputs(outputs):
+            return outputs
+
         self.processor._apply_default_parameters = mock_apply_default_parameters
+        self.processor._check_mm_limits = mock_check_mm_limits
+        self.processor.ernie4_5_processor.request2ids = mock_request2ids
+        self.processor.pack_outputs = mock_pack_outputs
 
         # 模拟推理解析器
         self.mock_reasoning_parser = MagicMock()
-        self.mock_reasoning_parser.__class__.__name__ = "ErnieX1ReasoningParser"
-        # self.mock_reasoning_parser.extract_reasoning_content_streaming.return_value = ("reasoning", "text")
+        self.mock_reasoning_parser.extract_reasoning_content_streaming.return_value = None
         self.processor.reasoning_parser = self.mock_reasoning_parser
 
         # 模拟工具解析器
@@ -55,61 +65,29 @@ def mock_apply_default_parameters(request):
         self.mock_tool_parser_obj.return_value = self.mock_tool_parser
         self.processor.tool_parser_obj = self.mock_tool_parser_obj
 
-    def test_process_request_dict_with_options(self):
-        request_dict = {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "prompt_token_ids": [1, 1, 1],
-        }
-        self.processor.process_request_dict(request_dict, 100)
-        self.assertEqual(request_dict["enable_thinking"], True)
-
-        request_dict = {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "chat_template_kwargs": {"enable_thinking": True},
-            "prompt_token_ids": [1, 1, 1],
-        }
-        self.processor.process_request_dict(request_dict, 100)
-        self.assertEqual(request_dict["enable_thinking"], True)
-
-        request_dict = {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "chat_template_kwargs": {"enable_thinking": False},
-            "prompt_token_ids": [1, 1, 1],
-        }
-        self.processor.process_request_dict(request_dict, 100)
-        self.assertEqual(request_dict["enable_thinking"], False)
-
-        request_dict = {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "chat_template_kwargs": {"options": {"thinking_mode": "open"}},
-            "prompt_token_ids": [1, 1, 1],
-        }
-        self.processor.process_request_dict(request_dict, 100)
-        self.assertEqual(request_dict["enable_thinking"], True)
+    def test_process_response_dict_streaming_normal_case(self):
+        """测试正常情况下的流式响应处理"""
+        # 准备输入
+        response_dict = {"finished": False, "request_id": "test", "outputs": {"token_ids": [4, 5]}}
+        kwargs = {"enable_thinking": True}
 
-        request_dict = {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "chat_template_kwargs": {"options": {"thinking_mode": "close"}},
-            "prompt_token_ids": [1, 1, 1],
-        }
-        self.processor.process_request_dict(request_dict, 100)
-        self.assertEqual(request_dict["enable_thinking"], False)
+        # 调用方法
+        result = self.processor.process_response_dict_streaming(response_dict, **kwargs)
 
-        request_dict = {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "chat_template_kwargs": {"options": {"thinking_mode": "false"}},
-            "prompt_token_ids": [1, 1, 1],
-        }
-        self.processor.process_request_dict(request_dict, 100)
-        self.assertEqual(request_dict["enable_thinking"], False)
+        # 验证结果
+        self.assertEqual(result["outputs"]["completion_tokens"], "delta_text")
 
+    def test_process_request_dict(self):
         request_dict = {
-            "messages": [{"role": "user", "content": "Hello"}],
-            "chat_template_kwargs": {"options": {"thinking_mode": "123"}},
-            "prompt_token_ids": [1, 1, 1],
+            "request_id": "123",
+            "messages": [{"role": "user", "content": "Hello!"}],
+            "chat_template_kwargs": {"chat_template": "Hello!"},
+            "eos_token_ids": [1],
+            "temperature": 1,
+            "top_p": 1,
         }
-        self.processor.process_request_dict(request_dict, 100)
-        self.assertEqual(request_dict["enable_thinking"], True)
+        result = self.processor.process_request_dict(request_dict, 100)
+        self.assertEqual(result["prompt_token_ids"], [1, 2, 3])
 
 
 if __name__ == "__main__":
diff --git a/tests/input/test_text_processor.py b/tests/input/test_text_processor.py
index 794d81895d7..b22b2d5a0ad 100644
--- a/tests/input/test_text_processor.py
+++ b/tests/input/test_text_processor.py
@@ -20,6 +20,7 @@ def setUp(self):
         self.processor.tool_parser_dict = {}
         self.processor.generation_config = MagicMock()
         self.processor.eos_token_ids = [1]
+        self.processor.model_status_dict = {"request-id_0": "think_start"}
         self.processor.reasoning_parser = MagicMock()
 
         def mock_messages2ids(request, **kwargs):
@@ -50,6 +51,7 @@ def test_process_request(self):
 
     def test_process_request_dict(self):
         request_dict = {
+            "request_id": "123",
             "messages": [{"role": "user", "content": "Hello!"}],
             "chat_template_kwargs": {"chat_template": "Hello!"},
             "eos_token_ids": [1],
diff --git a/tests/reasoning/test_qwen3_reasoning_parser.py b/tests/reasoning/test_qwen3_reasoning_parser.py
new file mode 100644
index 00000000000..cde56601608
--- /dev/null
+++ b/tests/reasoning/test_qwen3_reasoning_parser.py
@@ -0,0 +1,197 @@
+import unittest
+
+from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest
+from fastdeploy.reasoning.qwen3_reasoning_parsers import Qwen3ReasoningParser
+
+
+class MockTokenizer:
+    """Minimal tokenizer with vocab for testing."""
+
+    def __init__(self):
+        self.vocab = {
+            "<think>": 100,
+            "</think>": 101,
+        }
+
+    def get_vocab(self):
+        """Return vocab dict for testing."""
+        return self.vocab
+
+
+class MissingTokenTokenizer:
+    def __init__(self):
+        self.vocab = {
+            "</think>": 100,
+        }
+
+    def get_vocab(self):
+        """Return vocab dict for testing."""
+        return self.vocab
+
+
+class TestQwen3ReasoningParser(unittest.TestCase):
+    def setUp(self):
+        self.parser = Qwen3ReasoningParser(MockTokenizer())
+        self.request = ChatCompletionRequest(model="test", messages=[{"role": "user", "content": "test message"}])
+        self.tokenizer = MockTokenizer()
+
+    def test_missing_token(self):
+        with self.assertRaises(RuntimeError) as context:
+            Qwen3ReasoningParser(MissingTokenTokenizer())
+        exception_message = str(context.exception)
+        expected_message_part = "Qwen3 reasoning parser could not find the following token ids"
+        self.assertIn(expected_message_part, exception_message)
+
+    def test_get_model_status(self):
+        status = self.parser.get_model_status([1, 2, 100])
+        self.assertEqual(status, "think_start")
+        status = self.parser.get_model_status([1, 2, 101])
+        self.assertEqual(status, "think_end")
+        status = self.parser.get_model_status([1])
+        self.assertEqual(status, "think_start")
+
+    def test_streaming_thinking_content(self):
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="a",
+            delta_text="a",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[200],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.reasoning_content, "a")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="a</think>b",
+            delta_text="a</think>b",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[99, 101, 102],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.reasoning_content, "a")
+        self.assertEqual(msg.content, "b")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="a</think>",
+            current_text="a</think>b",
+            delta_text="b",
+            previous_token_ids=[1, 101],
+            current_token_ids=[],
+            delta_token_ids=[102],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.content, "b")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="a",
+            delta_text="a",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.reasoning_content, "a")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="a",
+            delta_text="a",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[200],
+            model_status="think_end",
+        )
+        self.assertEqual(msg.content, "a")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello",
+            current_text="hello</think>hi",
+            delta_text="</think>hi",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[101, 200],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.content, "hi")
+        self.assertEqual(msg.reasoning_content, "")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello",
+            current_text="hello</think>hi",
+            delta_text="hi",
+            previous_token_ids=[100],
+            current_token_ids=[],
+            delta_token_ids=[],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.content, None)
+        self.assertEqual(msg.reasoning_content, "hi")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello",
+            current_text="hello</think>hi",
+            delta_text="<think>hi",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[100, 200],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.content, "")
+        self.assertEqual(msg.reasoning_content, "hi")
+
+    def test_none_streaming_thinking_content(self):
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="a",
+            request={},
+            model_status="think_start",
+        )
+        self.assertEqual(reasoning_content, None)
+        self.assertEqual(content, "a")
+
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="a</think>b",
+            request={},
+            model_status="think_start",
+        )
+        self.assertEqual(reasoning_content, "a")
+        self.assertEqual(content, "b")
+
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="a",
+            request={},
+            model_status="think_end",
+        )
+        self.assertEqual(reasoning_content, None)
+        self.assertEqual(content, "a")
+
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="<think>a",
+            request={},
+            model_status="think_start",
+        )
+        self.assertEqual(reasoning_content, None)
+        self.assertEqual(content, "<think>a")
+
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="<think>a</think>b",
+            request={},
+            model_status="think_start",
+        )
+        self.assertEqual(reasoning_content, "a")
+        self.assertEqual(content, "b")
+
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="</think>b",
+            request={},
+            model_status="think_start",
+        )
+        self.assertEqual(reasoning_content, "")
+        self.assertEqual(content, "b")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/reasoning/test_reasoning_parser.py b/tests/reasoning/test_reasoning_parser.py
index e6deded445d..b4899e46bf6 100644
--- a/tests/reasoning/test_reasoning_parser.py
+++ b/tests/reasoning/test_reasoning_parser.py
@@ -31,10 +31,25 @@ class DummyTokenizer:
     def __init__(self):
         self.vocab = {
             "</think>": 100,
-            "<tool_call>": 101,
-            "</tool_call>": 102,
-            "<response>": 103,
-            "</response>": 104,
+            "<think>": 101,
+            "<tool_call>": 102,
+            "</tool_call>": 103,
+            "<response>": 104,
+            "</response>": 105,
+        }
+
+    def get_vocab(self):
+        """Return vocab dict for testing."""
+        return self.vocab
+
+
+class MissingTokenTokenizer:
+    def __init__(self):
+        self.vocab = {
+            "</think>": 100,
+            "<think>": 101,
+            "<tool_call>": 102,
+            "</tool_call>": 103,
         }
 
     def get_vocab(self):
@@ -132,6 +147,17 @@ def setUp(self):
         self.request = ChatCompletionRequest(model="test", messages=[{"role": "user", "content": "test message"}])
         self.tokenizer = DummyTokenizer()
 
+    def test_missing_token(self):
+        with self.assertRaises(RuntimeError) as context:
+            ErnieX1ReasoningParser(MissingTokenTokenizer())
+        exception_message = str(context.exception)
+        expected_message_part = "ernie x1 reasoning parser could not find the following token ids"
+        self.assertIn(expected_message_part, exception_message)
+
+    def test_get_model_status(self):
+        model_status = self.parser.get_model_status([88, 99, 104])
+        self.assertEqual(model_status, "response_start")
+
     # ---- Streaming parsing ----
     def test_streaming_thinking_content(self):
         msg = self.parser.extract_reasoning_content_streaming(
@@ -141,6 +167,7 @@ def test_streaming_thinking_content(self):
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[200],
+            model_status="think_start",
         )
         self.assertEqual(msg.reasoning_content, "a")
 
@@ -152,6 +179,7 @@ def test_streaming_thinking_newline_preserved(self):
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[201],
+            model_status="think_start",
         )
         self.assertEqual(msg.reasoning_content, "\n")
 
@@ -163,6 +191,7 @@ def test_streaming_thinking_end_tag(self):
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[self.parser.think_end_token_id],
+            model_status="think_start",
         )
         self.assertIsNone(msg)
 
@@ -174,6 +203,7 @@ def test_streaming_response_content(self):
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[202],
+            model_status="think_start",
         )
         self.assertEqual(msg.content, "h")
 
@@ -185,6 +215,7 @@ def test_streaming_response_newline_preserved(self):
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[203],
+            model_status="think_start",
         )
         self.assertEqual(msg.content, "\n")
 
@@ -197,6 +228,7 @@ def test_streaming_response_ignore_tags(self):
                 previous_token_ids=[],
                 current_token_ids=[],
                 delta_token_ids=[self.parser.vocab["<response>"]],
+                model_status="think_start",
             )
         )
 
@@ -207,6 +239,7 @@ def test_streaming_response_ignore_tags(self):
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[204],
+            model_status="think_start",
         )
         self.assertIsInstance(msg, DeltaMessage)
         self.assertEqual(msg.content, "\n")
@@ -219,9 +252,82 @@ def test_streaming_response_ignore_tags(self):
                 previous_token_ids=[],
                 current_token_ids=[],
                 delta_token_ids=[self.parser.vocab["</response>"]],
+                model_status="think_start",
             )
         )
 
+    def test_extract_reasoning_content_streaming(self):
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello</think>",
+            current_text="hello</think><response>",
+            delta_text="</think><response>",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[100, 200],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.content, "")
+        self.assertEqual(msg.reasoning_content, "")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello</think>",
+            current_text="hello</think><response>hi",
+            delta_text="</think><response>hi",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[100, 200],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.content, "hi")
+        self.assertEqual(msg.reasoning_content, "")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="hello</think><response>hi",
+            delta_text="hello</think><response>hi",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[100, 200],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.content, "hi")
+        self.assertEqual(msg.reasoning_content, "hello")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello</think><response>",
+            current_text="hello</think><response>hi",
+            delta_text="hi",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[100, 200],
+            model_status="think_end",
+        )
+        self.assertEqual(msg.content, "hi")
+        self.assertEqual(msg.reasoning_content, None)
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello</think><response>",
+            current_text="hello</think><response>hi",
+            delta_text="hi",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[100, 200],
+            model_status="response_start",
+        )
+        self.assertEqual(msg.content, "hi")
+        self.assertEqual(msg.reasoning_content, None)
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello</think><response>hi</response>",
+            current_text="hello</think><response>hi</response>end",
+            delta_text="end",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[100, 200],
+            model_status="response_start",
+        )
+        self.assertEqual(msg, None)
+
     def test_streaming_tool_call(self):
         msg = self.parser.extract_reasoning_content_streaming(
             previous_text="</think>",
@@ -230,40 +336,48 @@ def test_streaming_tool_call(self):
             previous_token_ids=[],
             current_token_ids=[],
             delta_token_ids=[self.parser.vocab["<tool_call>"]],
+            model_status="think_start",
         )
         self.assertIsNone(msg)
 
     # ---- Batch parsing ----
     def test_batch_reasoning_and_response(self):
         text = "abc\n</think>\n<response>hello\nworld</response>"
-        reasoning, response = self.parser.extract_reasoning_content(text, self.request)
+        reasoning, response = self.parser.extract_reasoning_content(text, self.request, "think_start")
         self.assertEqual(reasoning, "abc\n")
         self.assertEqual(response, "hello\nworld")
 
     def test_batch_reasoning_and_tool_call(self):
         text = "abc</think><tool_call>call_here"
-        reasoning, response = self.parser.extract_reasoning_content(text, self.request)
+        reasoning, response = self.parser.extract_reasoning_content(text, self.request, "think_start")
         self.assertEqual(reasoning, "abc")
         self.assertEqual(response, "")
 
     def test_batch_no_thinking_tag(self):
         text = "no_thinking_here"
-        reasoning, response = self.parser.extract_reasoning_content(text, self.request)
+        reasoning, response = self.parser.extract_reasoning_content(text, self.request, "think_start")
         self.assertEqual(reasoning, "no_thinking_here")
         self.assertEqual(response, "")
 
     def test_batch_response_without_end_tag(self):
         text = "abc</think><response>partial response"
-        reasoning, response = self.parser.extract_reasoning_content(text, self.request)
+        reasoning, response = self.parser.extract_reasoning_content(text, self.request, "think_start")
         self.assertEqual(reasoning, "abc")
         self.assertEqual(response, "partial response")
 
     def test_batch_preserve_all_newlines(self):
         text = "abc\n</think>\n<response>line1\nline2\n</response>"
-        reasoning, response = self.parser.extract_reasoning_content(text, self.request)
+        reasoning, response = self.parser.extract_reasoning_content(text, self.request, "think_start")
         self.assertEqual(reasoning, "abc\n")
         self.assertEqual(response, "line1\nline2\n")
 
+    def test_extract_reasoning_content(self):
+        reasoning_content, response_content = self.parser.extract_reasoning_content(
+            model_output="hello", request=self.request, model_status="response_start"
+        )
+        self.assertEqual(reasoning_content, "")
+        self.assertEqual(response_content, "hello")
+
 
 class TestErnie45VLThinkingReasoningParser(unittest.TestCase):
     def setUp(self):
@@ -281,6 +395,7 @@ def test_streaming_non_reasoning(self):
             previous_token_ids=[],
             current_token_ids=[200],
             delta_token_ids=[200],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertEqual(result.reasoning_content, "a")
@@ -294,6 +409,7 @@ def test_streaming_with_reasoning(self):
             previous_token_ids=[200, 201],
             current_token_ids=[200, 201, 100],
             delta_token_ids=[100],
+            model_status="think_start",
         )
         self.assertIsNone(result)
 
@@ -305,6 +421,7 @@ def test_streaming_with_reasoning_and_content(self):
             previous_token_ids=[200, 201],
             current_token_ids=[200, 201, 100, 300, 400],
             delta_token_ids=[100, 300, 400],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertIsNone(result.reasoning_content)
@@ -318,6 +435,7 @@ def test_streaming_with_reasoning_new_line(self):
             previous_token_ids=[200, 201, 202],
             current_token_ids=[200, 201, 202, 100],
             delta_token_ids=[100],
+            model_status="think_start",
         )
         self.assertIsNone(result)
 
@@ -329,9 +447,10 @@ def test_streaming_with_reasoning_and_tool(self):
             previous_token_ids=[200, 201, 202],
             current_token_ids=[200, 201, 202, 100, 200, 101],
             delta_token_ids=[100, 200, 101],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
-        self.assertEqual(result.reasoning_content, "")
+        self.assertEqual(result.reasoning_content, None)
 
     def test_streaming_with_reasoning_and_illegal_tool(self):
         result = self.parser.extract_reasoning_content_streaming(
@@ -341,6 +460,7 @@ def test_streaming_with_reasoning_and_illegal_tool(self):
             previous_token_ids=[200, 201, 202],
             current_token_ids=[200, 201, 202, 100, 200, 101],
             delta_token_ids=[109, 200, 101],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertEqual(result.content, "\n\nhello<tool_call>")
@@ -353,6 +473,7 @@ def test_streaming_with_reasoning_no_tool(self):
             previous_token_ids=[200, 201, 202],
             current_token_ids=[200, 201, 202, 100, 200, 110],
             delta_token_ids=[100, 200, 110],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertEqual(result.reasoning_content, "hello")
@@ -366,6 +487,7 @@ def test_streaming_reasoning_previous_no_tool(self):
             previous_token_ids=[100],
             current_token_ids=[100, 110, 111],
             delta_token_ids=[110, 111],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertIsNone(result.reasoning_content)
@@ -379,52 +501,127 @@ def test_streaming_no_reasoning_previous_tool(self):
             previous_token_ids=[101],
             current_token_ids=[101, 110],
             delta_token_ids=[110],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertEqual(result.reasoning_content, "hello")
 
+    def test_think_end_status_streaming(self):
+        result = self.parser.extract_reasoning_content_streaming(
+            previous_text="<tool_call>",
+            current_text="<tool_call>hello",
+            delta_text="hello",
+            previous_token_ids=[101],
+            current_token_ids=[101, 110],
+            delta_token_ids=[110],
+            model_status="think_end",
+        )
+        self.assertIs(result, None)
+
+        result = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello, ",
+            current_text="hello, hi",
+            delta_text="hi",
+            previous_token_ids=[101],
+            current_token_ids=[101, 110],
+            delta_token_ids=[110],
+            model_status="think_end",
+        )
+        self.assertIsInstance(result, DeltaMessage)
+        self.assertEqual(result.content, "hi")
+
+    def test_other_status_streaming(self):
+        result = self.parser.extract_reasoning_content_streaming(
+            previous_text="hello, ",
+            current_text="hello, hi",
+            delta_text="hi",
+            previous_token_ids=[101],
+            current_token_ids=[101, 110],
+            delta_token_ids=[110],
+            model_status="tool_call_start",
+        )
+        self.assertIs(result, None)
+
     def test_batch_no_think_end(self):
         reasoning, content = self.parser.extract_reasoning_content(
-            model_output="direct response", request=self.test_request
+            model_output="direct response", request=self.test_request, model_status="think_start"
         )
         self.assertEqual(reasoning, "direct response")
         self.assertEqual(content, "")
 
     def test_batch_no_think_end_with_tool(self):
         reasoning, content = self.parser.extract_reasoning_content(
-            model_output="direct response<tool_call>abc", request=self.test_request
+            model_output="direct response<tool_call>abc", request=self.test_request, model_status="think_start"
         )
         self.assertEqual(reasoning, "direct response<tool_call>abc")
         self.assertEqual(content, "")
 
     def test_batch_think_end_normal_content(self):
         reasoning, content = self.parser.extract_reasoning_content(
-            model_output="reasoning</think>\nresponse", request=self.test_request
+            model_output="reasoning</think>\nresponse", request=self.test_request, model_status="think_start"
         )
         self.assertEqual(reasoning, "reasoning")
         self.assertEqual(content, "\nresponse")
 
     def test_batch_think_end_with_tool(self):
         reasoning, content = self.parser.extract_reasoning_content(
-            model_output="reasoning</think>\n<tool_call>tool params</tool_call>", request=self.test_request
+            model_output="reasoning</think>\n<tool_call>tool params</tool_call>",
+            request=self.test_request,
+            model_status="think_start",
         )
         self.assertEqual(reasoning, "reasoning")
         self.assertEqual(content, "")
 
     def test_batch_think_end_with_illegal_tool(self):
         reasoning, content = self.parser.extract_reasoning_content(
-            model_output="reasoning</think>\nABC\n<tool_call>tool params</tool_call>", request=self.test_request
+            model_output="reasoning</think>\nABC\n<tool_call>tool params</tool_call>",
+            request=self.test_request,
+            model_status="think_start",
         )
         self.assertEqual(reasoning, "reasoning")
         self.assertEqual(content, "\nABC\n<tool_call>tool params</tool_call>")
 
     def test_batch_think_end_content_with_newline(self):
         reasoning, content = self.parser.extract_reasoning_content(
-            model_output="reasoning</think>\n\n  actual response", request=self.test_request
+            model_output="reasoning</think>\n\n  actual response",
+            request=self.test_request,
+            model_status="think_start",
         )
         self.assertEqual(reasoning, "reasoning")
         self.assertEqual(content, "\n\n  actual response")
 
+    def test_think_end_status_non_streaming(self):
+        reasoning, content = self.parser.extract_reasoning_content(
+            model_output="response", request=self.test_request, model_status="think_end"
+        )
+        self.assertEqual(reasoning, "")
+        self.assertEqual(content, "response")
+
+        reasoning, content = self.parser.extract_reasoning_content(
+            model_output="<tool_call>response", request=self.test_request, model_status="think_end"
+        )
+        self.assertEqual(reasoning, "")
+        self.assertEqual(content, "")
+
+        reasoning, content = self.parser.extract_reasoning_content(
+            model_output="\n 1<tool_call>response", request=self.test_request, model_status="think_end"
+        )
+        self.assertEqual(reasoning, "")
+        self.assertEqual(content, "\n 1<tool_call>response")
+
+    def test_other_status_non_streaming(self):
+        reasoning, content = self.parser.extract_reasoning_content(
+            model_output="response", request=self.test_request, model_status="tool_call_start"
+        )
+        self.assertEqual(reasoning, "")
+        self.assertEqual(content, "")
+
+        reasoning, content = self.parser.extract_reasoning_content(
+            model_output="response", request=self.test_request, model_status="tool_call_end"
+        )
+        self.assertEqual(reasoning, "")
+        self.assertEqual(content, "")
+
 
 class TestErnieVLReasoningParser(unittest.TestCase):
     def setUp(self):
@@ -442,6 +639,7 @@ def test_extract_reasoning_content_stream(self):
             previous_token_ids=[200, 201, 202],
             current_token_ids=[200, 201, 202, 100, 110, 120, 130],
             delta_token_ids=[100, 110, 120, 130],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertEqual(result.reasoning_content, "")
@@ -455,6 +653,7 @@ def test_extract_reasoning_content_stream_think_in_previous(self):
             previous_token_ids=[200, 201, 202, 100],
             current_token_ids=[200, 201, 202, 100, 110, 120, 130],
             delta_token_ids=[110, 120, 130],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertIsNone(result.reasoning_content)
@@ -468,6 +667,7 @@ def test_extract_reasoning_content_stream_no_think_token(self):
             previous_token_ids=[200, 201, 202],
             current_token_ids=[200, 201, 202, 110, 120, 130],
             delta_token_ids=[110, 120, 130],
+            model_status="think_start",
         )
         self.assertIsInstance(result, DeltaMessage)
         self.assertIsNone(result.content)
@@ -475,7 +675,7 @@ def test_extract_reasoning_content_stream_no_think_token(self):
 
     def test_extract_reasoning_content(self):
         reasoning, content = self.parser.extract_reasoning_content(
-            model_output="reasoning</think>\nactual response", request=self.test_request
+            model_output="reasoning</think>\nactual response", request=self.test_request, model_status="think_start"
         )
         self.assertEqual(reasoning, "reasoning")
         self.assertEqual(content, "\nactual response")
diff --git a/tests/reasoning/test_vl_reasoning_parser.py b/tests/reasoning/test_vl_reasoning_parser.py
new file mode 100644
index 00000000000..f9a36dd952e
--- /dev/null
+++ b/tests/reasoning/test_vl_reasoning_parser.py
@@ -0,0 +1,135 @@
+"""
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import unittest
+
+from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest
+from fastdeploy.reasoning.ernie_vl_reasoning_parsers import ErnieVLReasoningParser
+
+
+class MockTokenizer:
+    """Minimal tokenizer with vocab for testing."""
+
+    def __init__(self):
+        self.vocab = {
+            "<think>": 100,
+            "</think>": 101,
+        }
+
+    def get_vocab(self):
+        """Return vocab dict for testing."""
+        return self.vocab
+
+
+class TestErnieVLReasoningParser(unittest.TestCase):
+    def setUp(self):
+        self.parser = ErnieVLReasoningParser(MockTokenizer())
+        self.request = ChatCompletionRequest(model="test", messages=[{"role": "user", "content": "test message"}])
+        self.tokenizer = MockTokenizer()
+
+    def test_get_model_status(self):
+        status = self.parser.get_model_status([1, 2, 100])
+        self.assertEqual(status, "think_start")
+        status = self.parser.get_model_status([1, 2, 101])
+        self.assertEqual(status, "think_end")
+        status = self.parser.get_model_status([1])
+        self.assertEqual(status, "think_start")
+
+    def test_streaming_thinking_content(self):
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="a",
+            delta_text="a",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[200],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.reasoning_content, "a")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="a</think>b",
+            delta_text="a</think>b",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[100, 101, 102],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.reasoning_content, "a")
+        self.assertEqual(msg.content, "b")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="a</think>",
+            current_text="a</think>b",
+            delta_text="b",
+            previous_token_ids=[1, 101],
+            current_token_ids=[],
+            delta_token_ids=[102],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.content, "b")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="a",
+            delta_text="a",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[],
+            model_status="think_start",
+        )
+        self.assertEqual(msg.reasoning_content, "a")
+
+        msg = self.parser.extract_reasoning_content_streaming(
+            previous_text="",
+            current_text="a",
+            delta_text="a",
+            previous_token_ids=[],
+            current_token_ids=[],
+            delta_token_ids=[200],
+            model_status="think_end",
+        )
+        self.assertEqual(msg.content, "a")
+
+    def test_none_streaming_thinking_content(self):
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="a",
+            request={},
+            model_status="think_start",
+        )
+        self.assertEqual(reasoning_content, "")
+        self.assertEqual(content, "a")
+
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="a</think>b",
+            request={},
+            model_status="think_start",
+        )
+        self.assertEqual(reasoning_content, "a")
+        self.assertEqual(content, "b")
+
+        reasoning_content, content = self.parser.extract_reasoning_content(
+            model_output="a",
+            request={},
+            model_status="think_end",
+        )
+        self.assertEqual(reasoning_content, "")
+        self.assertEqual(content, "a")
+
+
+if __name__ == "__main__":
+    unittest.main()