From c0b66f9d5fbcc463746cc803a5a3f2568c81f7b2 Mon Sep 17 00:00:00 2001
From: xujing43 <xujing43@baidu.com>
Date: Tue, 5 Aug 2025 21:03:39 +0800
Subject: [PATCH 1/4] Add ci case for min token and max token

---
 test/ce/server/test_params_boundary.py | 36 ++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 test/ce/server/test_params_boundary.py

diff --git a/test/ce/server/test_params_boundary.py b/test/ce/server/test_params_boundary.py
new file mode 100644
index 00000000000..794e6702e5f
--- /dev/null
+++ b/test/ce/server/test_params_boundary.py
@@ -0,0 +1,36 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+# @author xujing43
+# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
+
+"""
+Boundary value checking for API parameters
+"""
+
+import json
+
+from core import (
+    TEMPLATE,
+    URL,
+    build_request_payload,
+    send_request,
+)
+
+def test_max_min_1_token():
+    data = {
+        "stream": False,
+        "messages": [{"role": "user", "content": "非洲的首都是？"}],
+        "max_tokens": 1,
+        "metadata": {
+                "min_tokens": 1
+            },
+    }
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(URL, payload).json()
+
+    response_object = response["object"]
+    assert "error" not in response_object, f"响应中包含错误信息: {response_object}"
+    completion_tokens = response["usage"]["completion_tokens"]
+    assert completion_tokens == 1, f"实际生成的token数为: {completion_tokens}, 应该为1"
+    finish_reason = response["choices"][0]["finish_reason"]
+    assert finish_reason == "length", f"内容不可能完整生成, 但实际finish_reason为: {response}"
\ No newline at end of file

From d88cb716b9f88afe9c962edfa73a0f0cf3b3fbd8 Mon Sep 17 00:00:00 2001
From: xujing43 <xujing43@baidu.com>
Date: Fri, 8 Aug 2025 19:34:25 +0800
Subject: [PATCH 2/4] =?UTF-8?q?=E3=80=90CI=20case=E3=80=91include=20total?=
 =?UTF-8?q?=5Ftokens=20in=20the=20last=20packet=20of=20completion=20interf?=
 =?UTF-8?q?ace=20stream=20output?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/ce/server/test_completions.py | 42 ++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 test/ce/server/test_completions.py

diff --git a/test/ce/server/test_completions.py b/test/ce/server/test_completions.py
new file mode 100644
index 00000000000..12532c5f62b
--- /dev/null
+++ b/test/ce/server/test_completions.py
@@ -0,0 +1,42 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+# @author xujing43
+# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
+
+"""
+Checking for /v1/completions parameters
+"""
+
+import json
+
+from core import (
+    TEMPLATE,
+    URL,
+    build_request_payload,
+    send_request,
+)
+
+URL = URL.replace("/v1/chat/completions", "/v1/completions")
+
+def test_completion_total_tokens():
+    data = {
+        "prompt": "你是谁",
+        "stream": True,
+        "stream_options": {"include_usage": True, "continuous_usage_stats": True},
+    }
+    
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(URL, payload, stream=True)
+    last_data = None
+    for line in resp.iter_lines(decode_unicode=True):
+        if line.strip() == "data: [DONE]":
+            break
+        if line.strip() == "" or not line.startswith("data: "):
+            continue
+        line = line[len("data: "):]
+        last_data = json.loads(line)
+    usage = last_data["usage"]
+    total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
+    assert "total_tokens" in usage, "total_tokens 不存在"
+    assert usage["total_tokens"]== total_tokens, "total_tokens计数不正确"
+    
\ No newline at end of file

From f897048bbb5307ea0927f6bccd6ce43507308c91 Mon Sep 17 00:00:00 2001
From: xujing43 <xujing43@baidu.com>
Date: Wed, 29 Oct 2025 17:17:13 +0800
Subject: [PATCH 3/4] =?UTF-8?q?=E3=80=90CE=E3=80=91add=20qwen25-vl?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml

diff --git a/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml b/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml
new file mode 100644
index 00000000000..0c9a944e699
--- /dev/null
+++ b/benchmarks/yaml/request_yaml/qwen25-vl-32k.yaml
@@ -0,0 +1,8 @@
+top_p: 0.8
+temperature: 0.7
+metadata:
+  min_tokens: 1
+max_tokens: 32768
+repetition_penalty: 1.05
+frequency_penalty: 0
+presence_penalty: 0
\ No newline at end of file

From 35874b28e062d00f7ced8d37207cd9acbcfd6d40 Mon Sep 17 00:00:00 2001
From: xujing43 <xujing43@baidu.com>
Date: Wed, 29 Oct 2025 17:21:33 +0800
Subject: [PATCH 4/4] =?UTF-8?q?=E3=80=90CE=E3=80=91add=20qwen25-vl?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml

diff --git a/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml b/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml
new file mode 100644
index 00000000000..d159e676f60
--- /dev/null
+++ b/benchmarks/yaml/qwen25_7b-vl-32k-bf16.yaml
@@ -0,0 +1,6 @@
+max_model_len: 32768
+max_num_seqs: 128
+gpu_memory_utilization: 0.85
+tensor_parallel_size: 1
+limit_mm_per_prompt: '{"image": 100, "video": 100}'
+enable_mm: True
\ No newline at end of file