diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index e4c0b717ad8..21fec92ab50 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -251,6 +251,8 @@ def add_requests(self, task, sampling_params=None, **kwargs): request = Request.from_dict(task) llm_logger.info(f"Receive request {request}") if sampling_params is not None: + if sampling_params.temperature is not None and abs(sampling_params.temperature) < 1e-06: + sampling_params.temperature = 1e-06 request.sampling_params = sampling_params request.preprocess_start_time = time.time() chat_template_kwargs = kwargs.get("chat_template_kwargs") or {} diff --git a/fastdeploy/entrypoints/engine_client.py b/fastdeploy/entrypoints/engine_client.py index e9664ddb6b9..38334a90e6c 100644 --- a/fastdeploy/entrypoints/engine_client.py +++ b/fastdeploy/entrypoints/engine_client.py @@ -304,7 +304,8 @@ def valid_parameters(self, data): api_server_logger.warning( f"req_id: {data['request_id']}, reasoning_max_tokens exceeds max_tokens, the value of reasoning_max_tokens will be adjusted to match that of max_tokens" ) - + if data.get("temperature") is not None and abs(data["temperature"]) < 1e-6: + data["temperature"] = 1e-6 # logprobs logprobs = data.get("logprobs") top_logprobs = None diff --git a/tests/entrypoints/test_engine_client.py b/tests/entrypoints/test_engine_client.py index 5aa720bbfcc..2493bb44d91 100644 --- a/tests/entrypoints/test_engine_client.py +++ b/tests/entrypoints/test_engine_client.py @@ -33,6 +33,19 @@ async def test_add_request(self): assert request["tools"] == [1] # assert request["chat_template_kwargs"]["tools"] == [1] + def test_valid_parameters(self): + request = { + "request_id": "test-request-id", + "chat_template_kwargs": {"enable_thinking": True}, + "prompt_token_ids": [1], + "chat_template": "Hello", + "max_tokens": 20, + "tools": [1], + "temperature": 0, + } + self.engine_client.valid_parameters(request) + assert request["temperature"] == 1e-6 + if __name__ == "__main__": unittest.main()