From c0b66f9d5fbcc463746cc803a5a3f2568c81f7b2 Mon Sep 17 00:00:00 2001 From: xujing43 Date: Tue, 5 Aug 2025 21:03:39 +0800 Subject: [PATCH 1/4] Add ci case for min token and max token --- test/ce/server/test_params_boundary.py | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 test/ce/server/test_params_boundary.py diff --git a/test/ce/server/test_params_boundary.py b/test/ce/server/test_params_boundary.py new file mode 100644 index 00000000000..794e6702e5f --- /dev/null +++ b/test/ce/server/test_params_boundary.py @@ -0,0 +1,36 @@ +#!/bin/env python3 +# -*- coding: utf-8 -*- +# @author xujing43 +# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python + +""" +Boundary value checking for API parameters +""" + +import json + +from core import ( + TEMPLATE, + URL, + build_request_payload, + send_request, +) + +def test_max_min_1_token(): + data = { + "stream": False, + "messages": [{"role": "user", "content": "非洲的首都是?"}], + "max_tokens": 1, + "metadata": { + "min_tokens": 1 + }, + } + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload).json() + + response_object = response["object"] + assert "error" not in response_object, f"响应中包含错误信息: {response_object}" + completion_tokens = response["usage"]["completion_tokens"] + assert completion_tokens == 1, f"实际生成的token数为: {completion_tokens}, 应该为1" + finish_reason = response["choices"][0]["finish_reason"] + assert finish_reason == "length", f"内容不可能完整生成, 但实际finish_reason为: {response}" \ No newline at end of file From d88cb716b9f88afe9c962edfa73a0f0cf3b3fbd8 Mon Sep 17 00:00:00 2001 From: xujing43 Date: Fri, 8 Aug 2025 19:34:25 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E3=80=90CI=20case=E3=80=91include=20total?= =?UTF-8?q?=5Ftokens=20in=20the=20last=20packet=20of=20completion=20interf?= =?UTF-8?q?ace=20stream=20output?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/ce/server/test_completions.py | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 test/ce/server/test_completions.py diff --git a/test/ce/server/test_completions.py b/test/ce/server/test_completions.py new file mode 100644 index 00000000000..12532c5f62b --- /dev/null +++ b/test/ce/server/test_completions.py @@ -0,0 +1,42 @@ +#!/bin/env python3 +# -*- coding: utf-8 -*- +# @author xujing43 +# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python + +""" +Checking for /v1/completions parameters +""" + +import json + +from core import ( + TEMPLATE, + URL, + build_request_payload, + send_request, +) + +URL = URL.replace("/v1/chat/completions", "/v1/completions") + +def test_completion_total_tokens(): + data = { + "prompt": "你是谁", + "stream": True, + "stream_options": {"include_usage": True, "continuous_usage_stats": True}, + } + + payload = build_request_payload(TEMPLATE, data) + resp = send_request(URL, payload, stream=True) + last_data = None + for line in resp.iter_lines(decode_unicode=True): + if line.strip() == "data: [DONE]": + break + if line.strip() == "" or not line.startswith("data: "): + continue + line = line[len("data: "):] + last_data = json.loads(line) + usage = last_data["usage"] + total_tokens = usage["completion_tokens"] + usage["prompt_tokens"] + assert "total_tokens" in usage, "total_tokens 不存在" + assert usage["total_tokens"]== total_tokens, "total_tokens计数不正确" + \ No newline at end of file From f897048bbb5307ea0927f6bccd6ce43507308c91 Mon Sep 17 00:00:00 2001 From: xujing43 Date: Wed, 29 Oct 2025 17:17:13 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E3=80=90CE=E3=80=91add=20qwen25-vl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml diff --git a/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml b/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml new file mode 100644 index 00000000000..0c9a944e699 --- /dev/null +++ b/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml @@ -0,0 +1,8 @@ +top_p: 0.8 +temperature: 0.7 +metadata: + min_tokens: 1 +max_tokens: 32768 +repetition_penalty: 1.05 +frequency_penalty: 0 +presence_penalty: 0 \ No newline at end of file From 35874b28e062d00f7ced8d37207cd9acbcfd6d40 Mon Sep 17 00:00:00 2001 From: xujing43 Date: Wed, 29 Oct 2025 17:21:33 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E3=80=90CE=E3=80=91add=20qwen25-vl?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml diff --git a/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml b/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml new file mode 100644 index 00000000000..d159e676f60 --- /dev/null +++ b/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml @@ -0,0 +1,6 @@ +max_model_len: 32768 +max_num_seqs: 128 +gpu_memory_utilization: 0.85 +tensor_parallel_size: 1 +limit_mm_per_prompt: '{"image": 100, "video": 100}' +enable_mm: True \ No newline at end of file