From 4c5bf67477aa228cd06ba63467927f875554bc73 Mon Sep 17 00:00:00 2001 From: zhengtianyu Date: Tue, 5 Aug 2025 15:43:12 +0800 Subject: [PATCH 1/3] add repitation early stop cases --- test/ce/server/core/__init__.py | 12 ++++++- test/ce/server/core/utils.py | 39 ++++++++++++++++++++++ test/ce/server/test_base_chat.py | 56 +++++++++++++++++++++++++++++++- 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/test/ce/server/core/__init__.py b/test/ce/server/core/__init__.py index 95a56128c16..7ab8c39b7d4 100644 --- a/test/ce/server/core/__init__.py +++ b/test/ce/server/core/__init__.py @@ -13,12 +13,22 @@ from .request_template import TEMPLATES from .utils import ( build_request_payload, + get_logprobs_list, + get_probs_list, get_stream_chunks, get_token_list, send_request, ) -__all__ = ["build_request_payload", "send_request", "TEMPLATES", "get_stream_chunks", "get_token_list"] +__all__ = [ + "build_request_payload", + "send_request", + "TEMPLATES", + "get_stream_chunks", + "get_token_list", + "get_logprobs_list", + "get_probs_list", +] # 检查环境变量是否存在 URL = os.environ.get("URL") diff --git a/test/ce/server/core/utils.py b/test/ce/server/core/utils.py index c12baf34601..92b00ed736b 100644 --- a/test/ce/server/core/utils.py +++ b/test/ce/server/core/utils.py @@ -4,6 +4,7 @@ # encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python import json +import math import requests from core import TEMPLATES, base_logger @@ -97,3 +98,41 @@ def get_token_list(response): base_logger.info(f"Token List:{token_list}") return token_list + + +def get_logprobs_list(response): + """解析 response 中的 token 文本列表""" + logprobs_list = [] + + try: + content_logprobs = response["choices"][0]["logprobs"]["content"] + except (KeyError, IndexError, TypeError) as e: + base_logger.error(f"解析失败:{e}") + return [] + + for token_info in content_logprobs: + token = token_info.get("logprob") + if token is not None: + logprobs_list.append(token) + + base_logger.info(f"Logprobs List:{logprobs_list}") + return logprobs_list + + +def get_probs_list(response): + """解析 response 中的 token 文本列表""" + probs_list = [] + + try: + content_logprobs = response["choices"][0]["logprobs"]["content"] + except (KeyError, IndexError, TypeError) as e: + base_logger.error(f"解析失败:{e}") + return [] + + for token_info in content_logprobs: + token = token_info.get("logprob") + if token is not None: + probs_list.append(math.exp(token)) + + base_logger.info(f"probs List:{probs_list}") + return probs_list diff --git a/test/ce/server/test_base_chat.py b/test/ce/server/test_base_chat.py index 6c665237ac3..2ef79267f53 100644 --- a/test/ce/server/test_base_chat.py +++ b/test/ce/server/test_base_chat.py @@ -9,7 +9,14 @@ import json -from core import TEMPLATE, URL, build_request_payload, get_token_list, send_request +from core import ( + TEMPLATE, + URL, + build_request_payload, + get_probs_list, + get_token_list, + send_request, +) def test_stream_response(): @@ -221,3 +228,50 @@ def test_bad_words_filtering1(): assert word in token_list, f"'{word}' 应出现在生成结果中" print("test_bad_words_filtering1 正例验证通过") + + +def test_repetition_early_stop(): + """ + 用于验证 repetition early stop 功能是否生效: + 设置 window_size=6,threshold=0.93,输入内容设计成易重复,观察模型是否提前截断输出。 + threshold = 0.93 + window_size = 6 这个必须是启动模型的时候加上这个参数 负责不能用!!!! + """ + + data = { + "stream": False, + "messages": [ + {"role": "user", "content": "输出'我爱吃果冻' 10次"}, + ], + "max_tokens": 10000, + "temperature": 0.8, + "top_p": 0, + } + + payload = build_request_payload(TEMPLATE, data) + response = send_request(URL, payload).json() + content = response["choices"][0]["message"]["content"] + + print("🧪 repetition early stop 输出内容:\n", content) + probs_list = get_probs_list(response) + + threshold = 0.93 + window_size = 6 + + assert len(probs_list) >= window_size, "列表长度不足 window_size" + + # 条件 1:末尾 6 个都 > threshold + tail = probs_list[-window_size:] + assert all(v > threshold for v in tail), "末尾 window_size 个数不全大于阈值" + + # 条件 2:前面不能有连续 >=6 个值 > threshold + head = probs_list[:-window_size] + count = 0 + for v in head: + if v > threshold: + count += 1 + assert count < window_size, f"在末尾之前出现了连续 {count} 个大于阈值的数" + else: + count = 0 + + print("repetition early stop 功能验证通过") From 0b3aab76fd558c18d59b0f50d26413ed2faed8d2 Mon Sep 17 00:00:00 2001 From: zhengtianyu Date: Tue, 5 Aug 2025 15:46:21 +0800 Subject: [PATCH 2/3] add repitation early stop cases --- test/ce/server/test_base_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ce/server/test_base_chat.py b/test/ce/server/test_base_chat.py index 2ef79267f53..73524c13e1f 100644 --- a/test/ce/server/test_base_chat.py +++ b/test/ce/server/test_base_chat.py @@ -157,7 +157,7 @@ def test_multi_turn_conversation(): def test_bad_words_filtering(): - banned_tokens = ["香"] + banned_tokens = ["香蕉"] data = { "stream": False, From e504263b5d7a9dd3e03f4b90e6b73c76d8f25dcc Mon Sep 17 00:00:00 2001 From: zhengtianyu Date: Mon, 15 Sep 2025 18:53:49 +0800 Subject: [PATCH 3/3] add structure test openai --- tests/ce/server/test_evil_cases.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/ce/server/test_evil_cases.py b/tests/ce/server/test_evil_cases.py index 4f89874bc84..88bda57d622 100644 --- a/tests/ce/server/test_evil_cases.py +++ b/tests/ce/server/test_evil_cases.py @@ -416,3 +416,27 @@ def test_max_tokens_non_integer(): assert ( resp.get("error").get("message") == "Input should be a valid integer, got a number with a fractional part" ), "未返回预期的 max_tokens 为非整数的错误信息" + + +def test_error_structure(): + """校验返回 error 结构,而不关心具体内容""" + data = { + "stream": False, + "messages": "我是一个非法的消息结构", + "max_tokens": 10, + } + + payload = build_request_payload(TEMPLATE, data) + resp = send_request(URL, payload).json() + + # 基本校验:必须有 error + assert "error" in resp, "返回结果缺少 error 字段" + err = resp["error"] + assert isinstance(err, dict), "error 不是字典" + + # 校验结构字段存在 + for field in ["message", "type", "param", "code"]: + assert field in err, f"error 缺少 {field} 字段" + + # message 不为 None + assert err["message"] is not None, "error.message 不应为 null"