diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py deleted file mode 100644 index f80c2006244..00000000000 --- a/tests/model_loader/test_common_model.py +++ /dev/null @@ -1,677 +0,0 @@ -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys - -import pytest - -current_dir = os.path.dirname(os.path.abspath(__file__)) -project_root = os.path.abspath(os.path.join(current_dir, "..")) -if project_root not in sys.path: - sys.path.insert(0, project_root) - -from tests.model_loader.utils import ( - check_tokens_id_and_text_close, - form_model_get_output_topp0, - get_paddle_model_path, - run_with_timeout, -) - -FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313)) -FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333)) - -prompts = ["解释下”温故而知新”", "Hello, how are you?"] - -# {id,baseline} -baseline = { - "Qwen3-0.6B.None.default": [ - ( - [ - 99487, - 115040, - 105855, - 3837, - 101034, - 99652, - 18493, - 100384, - 101047, - 99892, - 3837, - 101034, - 100007, - 18493, - 100384, - 15946, - 104026, - 99487, - 115040, - 36407, - 100627, - 105683, - 105520, - 106579, - 26850, - 101140, - 3837, - 35946, - 85106, - 81167, - 99487, - 151645, - ], - "这个成语的意思,以及它在教学中的应用,以及如何在教学中运用这个成语来提高学生的语文素养?\n\n首先,我需要确认这个", - ), - ( - [ - 358, - 2776, - 14589, - 369, - 279, - 60009, - 13, - 358, - 2776, - 14589, - 369, - 279, - 60009, - 13, - 358, - 2776, - 14589, - 369, - 279, - 60009, - 13, - 358, - 2776, - 14589, - 369, - 279, - 60009, - 13, - 358, - 2776, - 14589, - 151645, - ], - " I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry", - ), - ], - "Qwen3-0.6B.wint8.default": [ - ( - [ - 99487, - 115040, - 105855, - 3837, - 101034, - 99652, - 18493, - 100384, - 101047, - 99892, - 3837, - 101034, - 100007, - 18493, - 100384, - 15946, - 104026, - 99487, - 115040, - 36407, - 100627, - 99720, - 105595, - 101062, - 8997, - 2073, - 99416, - 99535, - 68536, - 52183, - 16628, - 151645, - ], - "这个成语的意思,以及它在教学中的应用,以及如何在教学中运用这个成语来提高学生的学习效果。\n“温故而知新", - ), - ( - [ - 358, - 2776, - 14589, - 369, - 279, - 60009, - 13, - 358, - 2776, - 14589, - 369, - 279, - 60009, - 13, - 358, - 2776, - 14589, - 369, - 279, - 60009, - 13, - 358, - 2776, - 14589, - 369, - 279, - 60009, - 13, - 358, - 2776, - 14589, - 151645, - ], - " I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry for the inconvenience. I'm sorry", - ), - ], - "Qwen3-0.6B.wint4.default": [ - ( - [ - 99487, - 115040, - 9370, - 109091, - 8997, - 102349, - 5122, - 99487, - 115040, - 9370, - 109091, - 20412, - 5122, - 99416, - 99535, - 100052, - 29826, - 3837, - 99794, - 100052, - 29826, - 3837, - 101982, - 102009, - 16628, - 100032, - 1773, - 104136, - 5122, - 99416, - 99535, - 151645, - ], - "这个成语的含义。\n答案:这个成语的含义是:温故旧事,了解旧事,从而掌握新知识。解释:温故", - ), - ( - [ - 358, - 2776, - 264, - 5458, - 518, - 264, - 12103, - 13, - 358, - 2776, - 264, - 5458, - 518, - 264, - 12103, - 13, - 358, - 2776, - 264, - 5458, - 518, - 264, - 12103, - 13, - 358, - 2776, - 264, - 5458, - 518, - 264, - 12103, - 151645, - ], - " I'm a student at a university. I'm a student at a university. I'm a student at a university. I'm a student at a university", - ), - ], - "ernie-4_5-21b-a3b-bf16-paddle.wint8.default": [ - ( - [ - 58544, - 23, - 5458, - 93956, - 1294, - 94705, - 94752, - 55817, - 94136, - 94041, - 93986, - 94227, - 80951, - 94226, - 1855, - 18982, - 78351, - 93956, - 94338, - 35829, - 5154, - 93977, - 24053, - 58544, - 706, - 8290, - 94022, - 94035, - 1594, - 26635, - 94029, - 2, - ], - "的含义\n不了, “温故而知新”是《论语》中的一句名言,由孔子提出。这句话的含义可以解释为:通过回顾和", - ), - ( - [ - 354, - 4932, - 536, - 93968, - 276, - 4447, - 1622, - 93937, - 25062, - 93938, - 354, - 1481, - 318, - 7427, - 441, - 536, - 274, - 4497, - 326, - 57142, - 38210, - 385, - 274, - 24742, - 18268, - 56335, - 93963, - 3717, - 82674, - 23050, - 45955, - 2, - ], - " I hope you're doing well. Today, I want to share with you a simple and delicious recipe for a classic Italian dish: Spaghetti Carbon", - ), - ], - "Qwen2-7B-Instruct.wint4.default": [ - ( - [ - 106599, - 105855, - 8997, - 2073, - 99416, - 99535, - 68536, - 52183, - 16628, - 854, - 110434, - 26940, - 67831, - 72881, - 25067, - 101047, - 26940, - 47764, - 68536, - 99824, - 87243, - 103283, - 17714, - 36987, - 99416, - 99535, - 68536, - 52183, - 16628, - 3837, - 73670, - 151645, - ], - "这句话的意思。\n“温故而知新”出自《论语》中的《学而篇》,原文为:“温故而知新,可以", - ), - ( - [ - 358, - 2776, - 1101, - 264, - 6366, - 2025, - 11, - 773, - 358, - 1513, - 944, - 614, - 15650, - 476, - 21261, - 13, - 358, - 2776, - 1588, - 311, - 1492, - 498, - 448, - 894, - 4755, - 498, - 2578, - 614, - 311, - 279, - 1850, - 151645, - ], - " I'm just a computer program, so I don't have feelings or emotions. I'm here to help you with any questions you might have to the best", - ), - ], - "Qwen3-30B-A3B.block_wise_fp8.triton": [ - ( - [ - 106599, - 9370, - 109091, - 90395, - 107485, - 46944, - 99912, - 111564, - 1773, - 1036, - 99416, - 99535, - 68536, - 52183, - 16628, - 854, - 99639, - 99700, - 110434, - 26940, - 67831, - 72881, - 25067, - 9370, - 115040, - 3837, - 111490, - 67338, - 107090, - 100052, - 107232, - 151645, - ], - "这句话的含义,并给出一个实际的例子。 “温故而知新”是一句出自《论语》的成语,意思是通过复习旧的知识", - ), - ( - [ - 358, - 2776, - 4460, - 311, - 1477, - 279, - 897, - 315, - 279, - 25098, - 315, - 279, - 729, - 282, - 2075, - 8, - 284, - 220, - 16, - 11884, - 87, - 61, - 17, - 488, - 220, - 16, - 8, - 504, - 856, - 284, - 481, - 151645, - ], - " I'm trying to find the value of the integral of the function f(x) = 1/(x^2 + 1) from x = -", - ), - ], - "Qwen3-30B-A3B.block_wise_fp8.deepgemm": [ - ( - [ - 106599, - 9370, - 109091, - 90395, - 107485, - 46944, - 99912, - 111564, - 1773, - 1036, - 99416, - 99535, - 68536, - 52183, - 16628, - 854, - 99639, - 99700, - 110434, - 26940, - 67831, - 72881, - 25067, - 9370, - 115040, - 3837, - 111490, - 67338, - 107090, - 100052, - 107232, - 151645, - ], - "这句话的含义,并给出一个实际的例子。 “温故而知新”是一句出自《论语》的成语,意思是通过复习旧的知识", - ), - ( - [ - 358, - 2776, - 4460, - 311, - 11625, - 419, - 3491, - 25, - 330, - 9885, - 279, - 897, - 315, - 279, - 7493, - 25, - 220, - 16, - 15, - 15, - 15, - 14, - 16, - 15, - 15, - 15, - 488, - 220, - 16, - 15, - 15, - 151645, - ], - " I'm trying to solve this problem: \"Find the value of the expression: 1000/1000 + 100", - ), - ], -} - -model_param_map = { - "Qwen3-0.6B": { - "max_num_seqs": 1, - "quantizations": ["None", "wint8", "wint4"], - }, - "ernie-4_5-21b-a3b-bf16-paddle": { - "max_num_seqs": 1, - "tensor_parallel_size": 2, - "quantizations": [ - "wint8", - ], - }, - "Qwen2-7B-Instruct": { - "max_num_seqs": 1, - "quantizations": ["wint4"], - }, - "Qwen3-30B-A3B": { - "tensor_parallel_size": 2, - "max_num_seqs": 1, - "quantizations": [ - { - "quant_type": "block_wise_fp8", - "backend": "triton", - "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"}, - }, - { - "quant_type": "block_wise_fp8", - "backend": "deepgemm", - "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17", "FD_USE_DEEP_GEMM": "1"}, - }, - ], - }, -} - - -params = [] -for model, cfg in model_param_map.items(): - for q in cfg["quantizations"]: - if isinstance(q, dict): - quant, backend, env = q["quant_type"], q.get("backend", "default"), q.get("env", {}) - else: - quant, backend, env = q, "default", {} - params.append( - pytest.param( - model, - cfg.get("torch_model_name_or_path", ""), - cfg.get("tensor_parallel_size", 1), - cfg.get("max_num_seqs", 1), - cfg.get("max_model_len", 1024), - quant, - cfg.get("max_tokens", 32), - env, - marks=[pytest.mark.core_model], - id=f"{model}.{quant}.{backend}", - ) - ) - - -@pytest.mark.parametrize( - "model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env", - params, -) -def test_common_model( - fd_runner, - model_name_or_path: str, - torch_model_name_or_path: str, - tensor_parallel_size: int, - max_num_seqs, - max_model_len: int, - max_tokens: int, - quantization: str, - env, - request, - monkeypatch, -) -> None: - model_path = get_paddle_model_path(model_name_or_path) - if env: - for k, v in env.items(): - monkeypatch.setenv(k, v) - - form_model_get_output = form_model_get_output_topp0 - - fd_outputs_v1 = run_with_timeout( - target=form_model_get_output, - args=( - fd_runner, - model_path, - tensor_parallel_size, - max_num_seqs, - max_model_len, - max_tokens, - quantization, - "default_v1", - FD_ENGINE_QUEUE_PORT, - prompts, - FD_CACHE_QUEUE_PORT, - ), - ) - - check_tokens_id_and_text_close( - outputs_0_lst=baseline[request.node.callspec.id], - outputs_1_lst=fd_outputs_v1, - name_0="default loader", - name_1="default_v1 loader", - )