From 859314859bfdf7741526437a509a15cab134d25f Mon Sep 17 00:00:00 2001 From: wufeisheng Date: Thu, 23 Oct 2025 16:20:12 +0800 Subject: [PATCH] fix req and token client --- fastdeploy/engine/request.py | 1 + fastdeploy/entrypoints/openai/response_processors.py | 8 ++++++-- fastdeploy/input/tokenzier_client.py | 3 +-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py index 940c1ed7fee..793de6f539e 100644 --- a/fastdeploy/engine/request.py +++ b/fastdeploy/engine/request.py @@ -323,6 +323,7 @@ def to_dict(self): "index": self.index, "send_idx": self.send_idx, "token_ids": self.token_ids, + "decode_type": self.decode_type, "logprob": self.logprob, "top_logprobs": self.top_logprobs, "draft_top_logprobs": self.draft_top_logprobs, diff --git a/fastdeploy/entrypoints/openai/response_processors.py b/fastdeploy/entrypoints/openai/response_processors.py index e51147899e5..b340133d6b7 100644 --- a/fastdeploy/entrypoints/openai/response_processors.py +++ b/fastdeploy/entrypoints/openai/response_processors.py @@ -99,7 +99,8 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking, image_ret = await self.decoder_client.decode_image( request=ImageDecodeRequest(req_id=req_id, data=all_tokens) ) - image["url"] = image_ret["http_url"] + if image_ret is not None: + image["url"] = image_ret["http_url"] image_output = self._end_image_code_request_output image_output["outputs"]["multipart"] = [image] image_output["outputs"]["token_ids"] = all_tokens @@ -138,10 +139,13 @@ async def process_response_chat(self, request_outputs, stream, enable_thinking, if self.decoder_client: req_id = part["request_output"]["request_id"] all_tokens = part["request_output"]["outputs"]["token_ids"] + image_ret = await self.decoder_client.decode_image( request=ImageDecodeRequest(req_id=req_id, data=all_tokens) ) - image["url"] = image_ret["http_url"] + + if image_ret is not None: + image["url"] = image_ret["http_url"] multipart.append(image) lasrt_request_output = self._multipart_buffer[-1]["request_output"] diff --git a/fastdeploy/input/tokenzier_client.py b/fastdeploy/input/tokenzier_client.py index 686f5f5b905..ff013cf3c9a 100644 --- a/fastdeploy/input/tokenzier_client.py +++ b/fastdeploy/input/tokenzier_client.py @@ -163,7 +163,7 @@ async def _async_decode_request(self, type: str, request: dict): resp.raise_for_status() if resp.json().get("code") != 0: raise RuntimeError(f"Tokenize task creation failed, {resp.json().get('message')}") - break + return resp.json().get("result") except Exception as e: data_processor_logger.error(f"Attempt to decode_request {attempt + 1} failed: {e}") if attempt == max_retries - 1: @@ -171,6 +171,5 @@ async def _async_decode_request(self, type: str, request: dict): f"Max retries of decode_request reached. Giving up. request is {request}" ) time.sleep(10) - return resp.json().get("result") except httpx.RequestError as e: raise RuntimeError(f"Failed to decode: {e}") from e