-
-
Notifications
You must be signed in to change notification settings - Fork 8.9k
Closed as not planned
Labels
bugSomething isn't workingSomething isn't workingstaleOver 90 days of inactivityOver 90 days of inactivity
Description
Your current environment
The output of `python collect_env.py`
Cannot be run using docker image `vllm/vllm-openai:latest`.
Model Input Dumps
No response
🐛 Describe the bug
The same request works perfectly fine in 0.6.4-post1 but fails in 0.6.5. It looks like the requests are failing if there's any nullable string in the provided schema.
[AI_APICallError]: Bad Request
at /app/node_modules/@ai-sdk/provider-utils/dist/index.js:516:14
at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
at async postToApi (/app/node_modules/@ai-sdk/provider-utils/dist/index.js:409:28)
at async OpenAIChatLanguageModel.doGenerate (/app/node_modules/@ai-sdk/openai/dist/index.js:520:50)
at async fn (/app/node_modules/ai/dist/index.js:2341:33)
at async /app/node_modules/ai/dist/index.js:343:22
at async _retryWithExponentialBackoff (/app/node_modules/ai/dist/index.js:171:12)
at async fn (/app/node_modules/ai/dist/index.js:2309:34)
at async /app/node_modules/ai/dist/index.js:343:22
cause: undefined,
url: 'http://vllm:8000/v1/chat/completions',
requestBodyValues: {
model: 'casperhansen/llama-3.3-70b-instruct-awq',
logit_bias: undefined,
logprobs: undefined,
top_logprobs: undefined,
user: undefined,
parallel_tool_calls: undefined,
max_tokens: 1000,
temperature: 0.1,
top_p: undefined,
frequency_penalty: undefined,
presence_penalty: undefined,
stop: undefined,
seed: undefined,
max_completion_tokens: undefined,
store: undefined,
metadata: undefined,
response_format: undefined,
messages: [ [Object], [Object] ],
tool_choice: { type: 'function', function: [Object] },
tools: [ [Object] ]
},
statusCode: 400,
responseHeaders: {
connection: 'keep-alive',
'content-length': '154',
'content-type': 'application/json',
date: 'Sun, 22 Dec 2024 14:17:14 GMT',
'keep-alive': 'timeout=5',
server: 'uvicorn',
'x-request-id': '889aec28b5c2488ca256e9b66c339710'
},
responseBody: '{"object":"error","message":"\\"type mismatch! call is<type>() before get<type>()\\" && is<std::string>()","type":"BadRequestError","param":null,"code":400}',
isRetryable: false,
data: undefined,
[Symbol(vercel.ai.error)]: true,
[Symbol(vercel.ai.error.AI_APICallError)]: true
}
How to reproduce this issue
Install the necessary dependencies:
npm i ai @ai-sdk/openai zod
Start a vLLM server, I used the following parameters:
--host 0.0.0.0 --port 8000 --model casperhansen/llama-3.3-70b-instruct-awq --quantization awq_marlin --gpu-memory-utilization 0.95 --max-model-len 8400 --enable-auto-tool-choice --tool-call-parser llama3_json --enable-chunked-prefill --max-num-batched-tokens 4096 --scheduler-delay-factor 0.5 --enable-prefix-caching --tensor-parallel-size 2 --trust-remote-code
Run the following script:
const { z } = require("zod");
const { createOpenAI } = require("@ai-sdk/openai");
const { generateObject } = require("ai");
const openai = createOpenAI({
baseURL: `http://redacted:8000/v1`,
apiKey: "redacted",
compatibility: "strict",
});
const model = openai.chat("casperhansen/llama-3.3-70b-instruct-awq");
async function main() {
// Fails when schema has any z.string().nullable()
try {
console.log("Running nullable strings test");
await generateObject({
model,
system: "Analyze the following text and extract information:",
prompt: "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
schema: z.object({
stuff: z
.object({
summary: z.string().nullable(),
topics: z.array(z.string()).nullable(),
importance: z.enum(["low", "medium", "high"]).nullable(),
})
.nullable(),
}),
temperature: 0.1,
maxTokens: 1000,
});
console.log("Huh, nullable strings failed for me");
} catch (err) {
console.error(err);
console.error("Reproduced error with nullable strings");
}
console.log("--------------------------------");
// Works when schema has no z.string().nullable()
try {
console.log("Running no nullable strings test");
await generateObject({
model,
system: "Analyze the following text and extract information:",
prompt: "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
schema: z.object({
stuff: z
.object({
summary: z.string(),
topics: z.array(z.string()).nullable(),
importance: z.enum(["low", "medium", "high"]).nullable(),
})
.nullable(),
}),
temperature: 0.1,
maxTokens: 1000,
});
console.log("Working as expected with no nullable strings");
} catch (err) {
console.error(err);
console.error("Huh, didn't fail for me");
}
}
main();
The terminal output looks like
Failing
2024-12-22T15:13:22.221251621Z INFO 12-22 07:13:22 logger.py:37] Received request chatcmpl-6f8d2becf3e740b3aff1cbd3e8d6f6af: prompt: '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nEnvironment: ipython\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nAnalyze the following text and extract information:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n\nRespond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables.\n\n{\n "type": "function",\n "function": {\n "name": "json",\n "description": "Respond with a JSON object.",\n "parameters": {\n "type": "object",\n "properties": {\n "stuff": {\n "anyOf": [\n {\n "type": "object",\n "properties": {\n "summary": {\n "type": [\n "string",\n "null"\n ]\n },\n "topics": {\n "anyOf": [\n {\n "type": "array",\n "items": {\n "type": "string"\n }\n },\n {\n "type": "null"\n }\n ]\n },\n "importance": {\n "anyOf": [\n {\n "type": "string",\n "enum": [\n "low",\n "medium",\n "high"\n ]\n },\n {\n "type": "null"\n }\n ]\n }\n },\n "required": [\n "summary",\n "topics",\n "importance"\n ],\n "additionalProperties": false\n },\n {\n "type": "null"\n }\n ]\n }\n },\n "required": [\n "stuff"\n ],\n "additionalProperties": false,\n "$schema": "http://json-schema.org/draft-07/schema#"\n }\n }\n}\n\nLorem ipsum dolor sit amet, consectetur adipiscing elit.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n', params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.1, top_p=1.0, top_k=-1, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=1000, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, guided_decoding=GuidedDecodingParams(json={'type': 'object', 'properties': {'stuff': {'anyOf': [{'type': 'object', 'properties': {'summary': {'type': ['string', 'null']}, 'topics': {'anyOf': [{'type': 'array', 'items': {'type': 'string'}}, {'type': 'null'}]}, 'importance': {'anyOf': [{'type': 'string', 'enum': ['low', 'medium', 'high']}, {'type': 'null'}]}}, 'required': ['summary', 'topics', 'importance'], 'additionalProperties': False}, {'type': 'null'}]}}, 'required': ['stuff'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, regex=None, choice=None, grammar=None, json_object=None, backend=None, whitespace_pattern=None)), prompt_token_ids: None, lora_request: None, prompt_adapter_request: None.
2024-12-22T15:13:22.223942347Z INFO: 127.0.0.1:43362 - "POST /v1/chat/completions HTTP/1.1" 400 Bad Request
Working
2024-12-22T15:13:22.510229904Z INFO 12-22 07:13:22 logger.py:37] Received request chatcmpl-7681529236c149ebbfa663b8a497ab78: prompt: '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nEnvironment: ipython\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nAnalyze the following text and extract information:<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n\nRespond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables.\n\n{\n "type": "function",\n "function": {\n "name": "json",\n "description": "Respond with a JSON object.",\n "parameters": {\n "type": "object",\n "properties": {\n "stuff": {\n "anyOf": [\n {\n "type": "object",\n "properties": {\n "summary": {\n "type": "string"\n },\n "topics": {\n "anyOf": [\n {\n "type": "array",\n "items": {\n "type": "string"\n }\n },\n {\n "type": "null"\n }\n ]\n },\n "importance": {\n "anyOf": [\n {\n "type": "string",\n "enum": [\n "low",\n "medium",\n "high"\n ]\n },\n {\n "type": "null"\n }\n ]\n }\n },\n "required": [\n "summary",\n "topics",\n "importance"\n ],\n "additionalProperties": false\n },\n {\n "type": "null"\n }\n ]\n }\n },\n "required": [\n "stuff"\n ],\n "additionalProperties": false,\n "$schema": "http://json-schema.org/draft-07/schema#"\n }\n }\n}\n\nLorem ipsum dolor sit amet, consectetur adipiscing elit.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n', params: SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.1, top_p=1.0, top_k=-1, min_p=0.0, seed=None, stop=[], stop_token_ids=[], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=1000, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, guided_decoding=GuidedDecodingParams(json={'type': 'object', 'properties': {'stuff': {'anyOf': [{'type': 'object', 'properties': {'summary': {'type': 'string'}, 'topics': {'anyOf': [{'type': 'array', 'items': {'type': 'string'}}, {'type': 'null'}]}, 'importance': {'anyOf': [{'type': 'string', 'enum': ['low', 'medium', 'high']}, {'type': 'null'}]}}, 'required': ['summary', 'topics', 'importance'], 'additionalProperties': False}, {'type': 'null'}]}}, 'required': ['stuff'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, regex=None, choice=None, grammar=None, json_object=None, backend=None, whitespace_pattern=None)), prompt_token_ids: None, lora_request: None, prompt_adapter_request: None.
2024-12-22T15:13:22.668880404Z INFO 12-22 07:13:22 engine.py:267] Added request chatcmpl-7681529236c149ebbfa663b8a497ab78.
2024-12-22T15:13:23.189630869Z INFO 12-22 07:13:23 metrics.py:467] Avg prompt throughput: 78.2 tokens/s, Avg generation throughput: 2.2 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.3%, CPU KV cache usage: 0.0%.
2024-12-22T15:13:23.189699858Z INFO 12-22 07:13:23 metrics.py:483] Prefix cache hit rate: GPU: 59.67%, CPU: 0.00%
2024-12-22T15:13:24.199740631Z INFO: 127.0.0.1:43366 - "POST /v1/chat/completions HTTP/1.1" 200 OK
Before submitting a new issue...
- Make sure you already searched for relevant issues, and asked the chatbot living at the bottom right corner of the documentation page, which can answer lots of frequently asked questions.
lmyslinski and sidpagariya
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workingstaleOver 90 days of inactivityOver 90 days of inactivity