From 64b3a530fb9bc6825c246cb8ddf6de254b71c4d1 Mon Sep 17 00:00:00 2001 From: aquagull Date: Thu, 16 Oct 2025 14:44:24 +0800 Subject: [PATCH 1/5] fix --- .../model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py index 6a96adeabd4..3dff165eaa4 100644 --- a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py +++ b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py @@ -170,8 +170,8 @@ def __init__( # TODO(hehongyu): remove this after fix model network setattr( self.gate.weight, - "model_format", - "", + "weight_need_transpose", + False, ) def forward(self, hidden_states: paddle.Tensor): From ba608af013dfab5e81f4b97eb3d1287d84016e3e Mon Sep 17 00:00:00 2001 From: Ayakouji Date: Tue, 28 Oct 2025 14:21:49 +0800 Subject: [PATCH 2/5] add test --- tests/model_loader/test_common_model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py index bc894bddc3c..ba4ff44e8e4 100644 --- a/tests/model_loader/test_common_model.py +++ b/tests/model_loader/test_common_model.py @@ -89,6 +89,12 @@ }, ], }, + "ERNIE-4.5-VL-28B-A3B": { + "max_num_seqs": 1, + "quantizations": ["wint4"], + "is_mm": True, + "torch_model_name_or_path": "ERNIE-4.5-VL-28B-A3B-PT", + }, } From e4439e3f83b62a9da2775aa831110f192f575085 Mon Sep 17 00:00:00 2001 From: Ayakouji Date: Tue, 28 Oct 2025 17:31:03 +0800 Subject: [PATCH 3/5] fix test --- tests/model_loader/test_common_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py index ba4ff44e8e4..ba27f9a8ecb 100644 --- a/tests/model_loader/test_common_model.py +++ b/tests/model_loader/test_common_model.py @@ -89,7 +89,7 @@ }, ], }, - "ERNIE-4.5-VL-28B-A3B": { + "ERNIE-4.5-VL-28B-A3B-Paddle": { "max_num_seqs": 1, "quantizations": ["wint4"], "is_mm": True, From fc945ab5f75d503fd81fc1314a7ad5aeebf1d1ad Mon Sep 17 00:00:00 2001 From: Ayakouji Date: Tue, 28 Oct 2025 19:18:47 +0800 Subject: [PATCH 4/5] fix test --- tests/model_loader/test_common_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py index ba27f9a8ecb..68caa10dee8 100644 --- a/tests/model_loader/test_common_model.py +++ b/tests/model_loader/test_common_model.py @@ -89,7 +89,7 @@ }, ], }, - "ERNIE-4.5-VL-28B-A3B-Paddle": { + "ernie-4_5-vl-28b-a3b-bf16-paddle": { "max_num_seqs": 1, "quantizations": ["wint4"], "is_mm": True, From 633de7bf356622145f986d3fc0716d81676a9f9b Mon Sep 17 00:00:00 2001 From: Ayakouji Date: Wed, 5 Nov 2025 16:05:22 +0800 Subject: [PATCH 5/5] update --- tests/model_loader/test_common_model.py | 211 ------------------------ 1 file changed, 211 deletions(-) delete mode 100644 tests/model_loader/test_common_model.py diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py deleted file mode 100644 index 68caa10dee8..00000000000 --- a/tests/model_loader/test_common_model.py +++ /dev/null @@ -1,211 +0,0 @@ -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys - -import pytest - -current_dir = os.path.dirname(os.path.abspath(__file__)) -project_root = os.path.abspath(os.path.join(current_dir, "..")) -if project_root not in sys.path: - sys.path.insert(0, project_root) - -from tests.model_loader.utils import ( - check_tokens_id_and_text_close, - form_model_get_output_topp0, - form_model_get_output_topp1, - get_paddle_model_path, - get_torch_model_path, - run_with_timeout, -) - -FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313)) -FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333)) - -prompts = ["解释下”温故而知新”", "Hello, how are you?"] - - -model_param_map = { - "Qwen3-0.6B": { - "max_num_seqs": 1, - "quantizations": ["None", "wint8", "wint4"], - }, - "ernie-4_5-21b-a3b-bf16-paddle": { - "max_num_seqs": 1, - "tensor_parallel_size": 2, - "quantizations": [ - "wint8", - ], - }, - "Qwen2-7B-Instruct": { - "max_num_seqs": 1, - "quantizations": ["wint4"], - }, - "Qwen2.5-VL-7B-Instruct": { - "max_num_seqs": 1, - "quantizations": ["wint4"], - "is_mm": True, - "torch_model_name_or_path": "Qwen2.5-VL-7B-Instruct-PT", - }, - "Qwen3-30B-A3B": { - "tensor_parallel_size": 2, - "max_num_seqs": 1, - "quantizations": [ - { - "quant_type": "block_wise_fp8", - "backend": "triton", - "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"}, - }, - { - "quant_type": "block_wise_fp8", - "backend": "deepgemm", - "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17", "FD_USE_DEEP_GEMM": "1"}, - }, - ], - }, - "DeepSeek-V3-0324": { - "tensor_parallel_size": 2, - "quantizations": [ - { - "quant_type": "wint4", - "env": { - "FD_ATTENTION_BACKEND": "MLA_ATTN", - "FLAGS_mla_use_tensorcore": "1", - "FLAGS_flash_attn_version": "3", - "FD_USE_MACHETE": "1", - }, - }, - ], - }, - "ernie-4_5-vl-28b-a3b-bf16-paddle": { - "max_num_seqs": 1, - "quantizations": ["wint4"], - "is_mm": True, - "torch_model_name_or_path": "ERNIE-4.5-VL-28B-A3B-PT", - }, -} - - -params = [] -for model, cfg in model_param_map.items(): - for q in cfg["quantizations"]: - if isinstance(q, dict): - quant, backend, env = q["quant_type"], q.get("backend", "default"), q.get("env", {}) - else: - quant, backend, env = q, "default", {} - params.append( - pytest.param( - model, - cfg.get("torch_model_name_or_path", ""), - cfg.get("tensor_parallel_size", 1), - cfg.get("max_num_seqs", 1), - cfg.get("max_model_len", 1024), - quant, - cfg.get("max_tokens", 32), - env, - cfg.get("is_mm", False), - marks=[pytest.mark.core_model], - id=f"{model}.{quant}.{backend}", - ) - ) - - -@pytest.mark.parametrize( - "model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env,is_mm", - params, -) -def test_common_model( - fd_runner, - model_name_or_path: str, - torch_model_name_or_path: str, - tensor_parallel_size: int, - max_num_seqs, - max_model_len: int, - max_tokens: int, - quantization: str, - env, - is_mm: bool, - monkeypatch, -) -> None: - model_path = get_paddle_model_path(model_name_or_path) - if env: - for k, v in env.items(): - monkeypatch.setenv(k, v) - - form_model_get_output = form_model_get_output_topp0 if not is_mm else form_model_get_output_topp1 - fd_outputs_v0 = run_with_timeout( - target=form_model_get_output, - args=( - fd_runner, - model_path, - tensor_parallel_size, - max_num_seqs, - max_model_len, - max_tokens, - quantization, - "default", - FD_ENGINE_QUEUE_PORT, - prompts, - FD_CACHE_QUEUE_PORT, - ), - ) - fd_outputs_v1 = run_with_timeout( - target=form_model_get_output, - args=( - fd_runner, - model_path, - tensor_parallel_size, - max_num_seqs, - max_model_len, - max_tokens, - quantization, - "default_v1", - FD_ENGINE_QUEUE_PORT, - prompts, - FD_CACHE_QUEUE_PORT, - ), - ) - - check_tokens_id_and_text_close( - outputs_0_lst=fd_outputs_v0, - outputs_1_lst=fd_outputs_v1, - name_0="default loader", - name_1="default_v1 loader", - ) - - if torch_model_name_or_path != "": - torch_model_path = get_torch_model_path(torch_model_name_or_path) - fd_outputs_v1_torch = run_with_timeout( - target=form_model_get_output, - args=( - fd_runner, - torch_model_path, - tensor_parallel_size, - max_num_seqs, - max_model_len, - max_tokens, - quantization, - "default_v1", - FD_ENGINE_QUEUE_PORT, - prompts, - FD_CACHE_QUEUE_PORT, - ), - ) - check_tokens_id_and_text_close( - outputs_0_lst=fd_outputs_v1, - outputs_1_lst=fd_outputs_v1_torch, - name_0="default loader", - name_1="default_v1 loader", - )