From 64b3a530fb9bc6825c246cb8ddf6de254b71c4d1 Mon Sep 17 00:00:00 2001
From: aquagull <yuhongh@qq.com>
Date: Thu, 16 Oct 2025 14:44:24 +0800
Subject: [PATCH 1/5] fix

---
 .../model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
index 6a96adeabd4..3dff165eaa4 100644
--- a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
+++ b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
@@ -170,8 +170,8 @@ def __init__(
         # TODO(hehongyu): remove this after fix model network
         setattr(
             self.gate.weight,
-            "model_format",
-            "",
+            "weight_need_transpose",
+            False,
         )
 
     def forward(self, hidden_states: paddle.Tensor):

From ba608af013dfab5e81f4b97eb3d1287d84016e3e Mon Sep 17 00:00:00 2001
From: Ayakouji <yuhongh@qq.com>
Date: Tue, 28 Oct 2025 14:21:49 +0800
Subject: [PATCH 2/5] add test

---
 tests/model_loader/test_common_model.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py
index bc894bddc3c..ba4ff44e8e4 100644
--- a/tests/model_loader/test_common_model.py
+++ b/tests/model_loader/test_common_model.py
@@ -89,6 +89,12 @@
             },
         ],
     },
+    "ERNIE-4.5-VL-28B-A3B": {
+        "max_num_seqs": 1,
+        "quantizations": ["wint4"],
+        "is_mm": True,
+        "torch_model_name_or_path": "ERNIE-4.5-VL-28B-A3B-PT",
+    },
 }
 
 

From e4439e3f83b62a9da2775aa831110f192f575085 Mon Sep 17 00:00:00 2001
From: Ayakouji <yuhongh@qq.com>
Date: Tue, 28 Oct 2025 17:31:03 +0800
Subject: [PATCH 3/5] fix test

---
 tests/model_loader/test_common_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py
index ba4ff44e8e4..ba27f9a8ecb 100644
--- a/tests/model_loader/test_common_model.py
+++ b/tests/model_loader/test_common_model.py
@@ -89,7 +89,7 @@
             },
         ],
     },
-    "ERNIE-4.5-VL-28B-A3B": {
+    "ERNIE-4.5-VL-28B-A3B-Paddle": {
         "max_num_seqs": 1,
         "quantizations": ["wint4"],
         "is_mm": True,

From fc945ab5f75d503fd81fc1314a7ad5aeebf1d1ad Mon Sep 17 00:00:00 2001
From: Ayakouji <yuhongh@qq.com>
Date: Tue, 28 Oct 2025 19:18:47 +0800
Subject: [PATCH 4/5] fix test

---
 tests/model_loader/test_common_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py
index ba27f9a8ecb..68caa10dee8 100644
--- a/tests/model_loader/test_common_model.py
+++ b/tests/model_loader/test_common_model.py
@@ -89,7 +89,7 @@
             },
         ],
     },
-    "ERNIE-4.5-VL-28B-A3B-Paddle": {
+    "ernie-4_5-vl-28b-a3b-bf16-paddle": {
         "max_num_seqs": 1,
         "quantizations": ["wint4"],
         "is_mm": True,

From 633de7bf356622145f986d3fc0716d81676a9f9b Mon Sep 17 00:00:00 2001
From: Ayakouji <yuhongh@qq.com>
Date: Wed, 5 Nov 2025 16:05:22 +0800
Subject: [PATCH 5/5] update

---
 tests/model_loader/test_common_model.py | 211 ------------------------
 1 file changed, 211 deletions(-)
 delete mode 100644 tests/model_loader/test_common_model.py

diff --git a/tests/model_loader/test_common_model.py b/tests/model_loader/test_common_model.py
deleted file mode 100644
index 68caa10dee8..00000000000
--- a/tests/model_loader/test_common_model.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-
-import pytest
-
-current_dir = os.path.dirname(os.path.abspath(__file__))
-project_root = os.path.abspath(os.path.join(current_dir, ".."))
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-
-from tests.model_loader.utils import (
-    check_tokens_id_and_text_close,
-    form_model_get_output_topp0,
-    form_model_get_output_topp1,
-    get_paddle_model_path,
-    get_torch_model_path,
-    run_with_timeout,
-)
-
-FD_ENGINE_QUEUE_PORT = int(os.getenv("FD_ENGINE_QUEUE_PORT", 8313))
-FD_CACHE_QUEUE_PORT = int(os.getenv("FD_CACHE_QUEUE_PORT", 8333))
-
-prompts = ["解释下”温故而知新”", "Hello, how are you?"]
-
-
-model_param_map = {
-    "Qwen3-0.6B": {
-        "max_num_seqs": 1,
-        "quantizations": ["None", "wint8", "wint4"],
-    },
-    "ernie-4_5-21b-a3b-bf16-paddle": {
-        "max_num_seqs": 1,
-        "tensor_parallel_size": 2,
-        "quantizations": [
-            "wint8",
-        ],
-    },
-    "Qwen2-7B-Instruct": {
-        "max_num_seqs": 1,
-        "quantizations": ["wint4"],
-    },
-    "Qwen2.5-VL-7B-Instruct": {
-        "max_num_seqs": 1,
-        "quantizations": ["wint4"],
-        "is_mm": True,
-        "torch_model_name_or_path": "Qwen2.5-VL-7B-Instruct-PT",
-    },
-    "Qwen3-30B-A3B": {
-        "tensor_parallel_size": 2,
-        "max_num_seqs": 1,
-        "quantizations": [
-            {
-                "quant_type": "block_wise_fp8",
-                "backend": "triton",
-                "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"},
-            },
-            {
-                "quant_type": "block_wise_fp8",
-                "backend": "deepgemm",
-                "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17", "FD_USE_DEEP_GEMM": "1"},
-            },
-        ],
-    },
-    "DeepSeek-V3-0324": {
-        "tensor_parallel_size": 2,
-        "quantizations": [
-            {
-                "quant_type": "wint4",
-                "env": {
-                    "FD_ATTENTION_BACKEND": "MLA_ATTN",
-                    "FLAGS_mla_use_tensorcore": "1",
-                    "FLAGS_flash_attn_version": "3",
-                    "FD_USE_MACHETE": "1",
-                },
-            },
-        ],
-    },
-    "ernie-4_5-vl-28b-a3b-bf16-paddle": {
-        "max_num_seqs": 1,
-        "quantizations": ["wint4"],
-        "is_mm": True,
-        "torch_model_name_or_path": "ERNIE-4.5-VL-28B-A3B-PT",
-    },
-}
-
-
-params = []
-for model, cfg in model_param_map.items():
-    for q in cfg["quantizations"]:
-        if isinstance(q, dict):
-            quant, backend, env = q["quant_type"], q.get("backend", "default"), q.get("env", {})
-        else:
-            quant, backend, env = q, "default", {}
-        params.append(
-            pytest.param(
-                model,
-                cfg.get("torch_model_name_or_path", ""),
-                cfg.get("tensor_parallel_size", 1),
-                cfg.get("max_num_seqs", 1),
-                cfg.get("max_model_len", 1024),
-                quant,
-                cfg.get("max_tokens", 32),
-                env,
-                cfg.get("is_mm", False),
-                marks=[pytest.mark.core_model],
-                id=f"{model}.{quant}.{backend}",
-            )
-        )
-
-
-@pytest.mark.parametrize(
-    "model_name_or_path,torch_model_name_or_path,tensor_parallel_size,max_num_seqs,max_model_len,quantization,max_tokens,env,is_mm",
-    params,
-)
-def test_common_model(
-    fd_runner,
-    model_name_or_path: str,
-    torch_model_name_or_path: str,
-    tensor_parallel_size: int,
-    max_num_seqs,
-    max_model_len: int,
-    max_tokens: int,
-    quantization: str,
-    env,
-    is_mm: bool,
-    monkeypatch,
-) -> None:
-    model_path = get_paddle_model_path(model_name_or_path)
-    if env:
-        for k, v in env.items():
-            monkeypatch.setenv(k, v)
-
-    form_model_get_output = form_model_get_output_topp0 if not is_mm else form_model_get_output_topp1
-    fd_outputs_v0 = run_with_timeout(
-        target=form_model_get_output,
-        args=(
-            fd_runner,
-            model_path,
-            tensor_parallel_size,
-            max_num_seqs,
-            max_model_len,
-            max_tokens,
-            quantization,
-            "default",
-            FD_ENGINE_QUEUE_PORT,
-            prompts,
-            FD_CACHE_QUEUE_PORT,
-        ),
-    )
-    fd_outputs_v1 = run_with_timeout(
-        target=form_model_get_output,
-        args=(
-            fd_runner,
-            model_path,
-            tensor_parallel_size,
-            max_num_seqs,
-            max_model_len,
-            max_tokens,
-            quantization,
-            "default_v1",
-            FD_ENGINE_QUEUE_PORT,
-            prompts,
-            FD_CACHE_QUEUE_PORT,
-        ),
-    )
-
-    check_tokens_id_and_text_close(
-        outputs_0_lst=fd_outputs_v0,
-        outputs_1_lst=fd_outputs_v1,
-        name_0="default loader",
-        name_1="default_v1 loader",
-    )
-
-    if torch_model_name_or_path != "":
-        torch_model_path = get_torch_model_path(torch_model_name_or_path)
-        fd_outputs_v1_torch = run_with_timeout(
-            target=form_model_get_output,
-            args=(
-                fd_runner,
-                torch_model_path,
-                tensor_parallel_size,
-                max_num_seqs,
-                max_model_len,
-                max_tokens,
-                quantization,
-                "default_v1",
-                FD_ENGINE_QUEUE_PORT,
-                prompts,
-                FD_CACHE_QUEUE_PORT,
-            ),
-        )
-        check_tokens_id_and_text_close(
-            outputs_0_lst=fd_outputs_v1,
-            outputs_1_lst=fd_outputs_v1_torch,
-            name_0="default loader",
-            name_1="default_v1 loader",
-        )