-
-
Notifications
You must be signed in to change notification settings - Fork 9.5k
Description
Your current environment
"version": "0.8.5.post1"
🐛 Describe the bug
vllm completions api does not support max_tokens=null, although openai does. https://platform.openai.com/docs/api-reference/completions/create
Request:
http://vllm.host/v1/completions
{
"model":"Qwen/Qwen2.5-Coder-7B-Instruct-AWQ",
"prompt":"test",
"temperature": 0,
"max_tokens": null
}
Response:
Internal Server Error
logs
``` AssertionError2025-06-19 16:10:24.613
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.613
assert request.max_tokens is not None
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/serving_completion.py", line 419, in request_output_to_completion_response
2025-06-19 16:10:24.613
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.613
response = self.request_output_to_completion_response(
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/serving_completion.py", line 219, in create_completion
2025-06-19 16:10:24.613
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.613
generator = await handler.create_completion(request, raw_request)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/openai/api_server.py", line 498, in create_completion
2025-06-19 16:10:24.613
^^^^^^^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.613
return await func(*args, **kwargs)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/utils.py", line 85, in wrapper
2025-06-19 16:10:24.613
^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.613
return handler_task.result()
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/vllm/entrypoints/utils.py", line 63, in wrapper
2025-06-19 16:10:24.613
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.613
return await dependant.call(**values)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 212, in run_endpoint_function
2025-06-19 16:10:24.613
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.613
raw_response = await run_endpoint_function(
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/fastapi/routing.py", line 301, in app
2025-06-19 16:10:24.613
^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.613
response = await f(request)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 73, in app
2025-06-19 16:10:24.613
await app(scope, receive, sender)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 42, in wrapped_app
2025-06-19 16:10:24.613
raise exc
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
2025-06-19 16:10:24.613
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 76, in app
2025-06-19 16:10:24.613
await self.app(scope, receive, send)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 288, in handle
2025-06-19 16:10:24.613
await route.handle(scope, receive, send)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 734, in app
2025-06-19 16:10:24.613
await self.middleware_stack(scope, receive, send)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/routing.py", line 714, in call
2025-06-19 16:10:24.613
await app(scope, receive, sender)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 42, in wrapped_app
2025-06-19 16:10:24.613
raise exc
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
2025-06-19 16:10:24.613
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/exceptions.py", line 62, in call
2025-06-19 16:10:24.613
await self.app(scope, receive, send_wrapper)
2025-06-19 16:10:24.613
File "/usr/local/lib/python3.12/dist-packages/prometheus_fastapi_instrumentator/middleware.py", line 175, in call
2025-06-19 16:10:24.613
raise exc
2025-06-19 16:10:24.612
File "/usr/local/lib/python3.12/dist-packages/prometheus_fastapi_instrumentator/middleware.py", line 177, in call
2025-06-19 16:10:24.612
await self.app(scope, receive, send)
2025-06-19 16:10:24.612
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/cors.py", line 85, in call
2025-06-19 16:10:24.612
await self.app(scope, receive, _send)
2025-06-19 16:10:24.612
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 165, in call
2025-06-19 16:10:24.612
raise exc
2025-06-19 16:10:24.612
File "/usr/local/lib/python3.12/dist-packages/starlette/middleware/errors.py", line 187, in call
2025-06-19 16:10:24.612
await self.middleware_stack(scope, receive, send)
2025-06-19 16:10:24.612
File "/usr/local/lib/python3.12/dist-packages/starlette/applications.py", line 112, in call
2025-06-19 16:10:24.612
await super().call(scope, receive, send)
2025-06-19 16:10:24.612
File "/usr/local/lib/python3.12/dist-packages/fastapi/applications.py", line 1054, in call
2025-06-19 16:10:24.612
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.612
return await self.app(scope, receive, send)
2025-06-19 16:10:24.612
File "/usr/local/lib/python3.12/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in call
2025-06-19 16:10:24.612
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2025-06-19 16:10:24.612
result = await app( # type: ignore[func-returns-value]
2025-06-19 16:10:24.612
File "/usr/local/lib/python3.12/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 409, in run_asgi
2025-06-19 16:10:24.612
Traceback (most recent call last):
</details>
### Before submitting a new issue...
- [x] Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the [documentation page](https://docs.vllm.ai/en/latest/), which can answer lots of frequently asked questions.