From 0fd26e3498fb00f590b33d1792e4ea398858f27d Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Thu, 16 Oct 2025 16:04:32 +0800
Subject: [PATCH 1/4] mlp.gate cannot be skipped

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 gptqmodel/models/definitions/qwen3_moe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gptqmodel/models/definitions/qwen3_moe.py b/gptqmodel/models/definitions/qwen3_moe.py
index 31e788838..0f0dd9919 100644
--- a/gptqmodel/models/definitions/qwen3_moe.py
+++ b/gptqmodel/models/definitions/qwen3_moe.py
@@ -26,7 +26,7 @@ class Qwen3MoeQModel(BaseQModel):
             "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0",  "o_proj:1"),
             "post_attention_layernorm": ("post_attention_layernorm:!",),
             "mlp": {
-                "gate": ("gate:!",), # <-- 0.5MB per layer. Not worth quantizing
+                "gate": ("gate",),
                 "experts": {
                     "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
                 },

From 8dd881d3896aa3007108869e474496962a451109 Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Thu, 16 Oct 2025 17:31:34 +0800
Subject: [PATCH 2/4] Revert "mlp.gate cannot be skipped"

This reverts commit 0fd26e3498fb00f590b33d1792e4ea398858f27d.
---
 gptqmodel/models/definitions/qwen3_moe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gptqmodel/models/definitions/qwen3_moe.py b/gptqmodel/models/definitions/qwen3_moe.py
index 0f0dd9919..31e788838 100644
--- a/gptqmodel/models/definitions/qwen3_moe.py
+++ b/gptqmodel/models/definitions/qwen3_moe.py
@@ -26,7 +26,7 @@ class Qwen3MoeQModel(BaseQModel):
             "self_attn": ("q_proj:0", "k_proj:0", "v_proj:0",  "o_proj:1"),
             "post_attention_layernorm": ("post_attention_layernorm:!",),
             "mlp": {
-                "gate": ("gate",),
+                "gate": ("gate:!",), # <-- 0.5MB per layer. Not worth quantizing
                 "experts": {
                     "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
                 },

From f19d863ba314c0dd97c29cef49dfbdf8906afa2d Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Thu, 16 Oct 2025 19:22:37 +0800
Subject: [PATCH 3/4] add "module_tree_overrides" field

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 gptqmodel/models/base.py                  | 30 ++++++++++++++++++++++-
 gptqmodel/models/definitions/qwen3_moe.py | 11 +++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
index bfffa308c..726bbc4a3 100644
--- a/gptqmodel/models/base.py
+++ b/gptqmodel/models/base.py
@@ -110,6 +110,25 @@ def check_support_param_buffer_assignment(*args, **kwargs):
     return False
 
 
+def apply_module_tree_override(module_tree, override):
+    """
+    Recursively find the corresponding key of override in module_tree and override it.
+    """
+    if isinstance(module_tree, dict) and isinstance(override, dict):
+        for k, v in override.items():
+            if k in module_tree and isinstance(module_tree[k], (dict, list)) and isinstance(v, (dict, list)):
+                module_tree[k] = apply_module_tree_override(module_tree[k], v)
+            else:
+                module_tree[k] = v
+    elif isinstance(module_tree, list) and isinstance(override, list):
+        for o in override:
+            if isinstance(o, dict):
+                for b in module_tree:
+                    if isinstance(b, dict):
+                        apply_module_tree_override(b, o)
+    return module_tree
+
+
 NOT_QUANTIZE_FLAG = ":!"
 
 
@@ -125,6 +144,8 @@ class BaseQModel(nn.Module):
 
     # a tree node of all the roots that contain quantizable modules
     module_tree: List[str] = None
+    # Override module_tree according to different QUANT_METHOD
+    module_tree_overrides: dict[METHOD, List[str]] = None
 
     # Strict=True -> all layer_modules must exists in model
     # Some models (deepseek2-lite) dynamically create lora modules based on config.rank
@@ -198,6 +219,13 @@ def __init__(
     ):
         super().__init__()
 
+        quant_method = quantize_config.quant_method
+        # override module_tree if need
+        if self.module_tree_overrides.get(quant_method) is not None:
+            log.info(f'Module Tree: overridden by METHOD.{quant_method.upper()}')
+            # setting cls.module_tree
+            type(self).module_tree = apply_module_tree_override(self.module_tree, self.module_tree_overrides[quant_method])
+
         # record configuration early so model lifecycle hooks can rely on them
         self.compiled = False  # set to True while compile() is triggered successfully
         self.quantized = quantized
@@ -794,7 +822,7 @@ def quantize(
             )
 
         if not self.support_batch_quantize:
-            log.warn("Quantize: batch_size overriden by model class definition to `disabled`")
+            log.warn("Quantize: batch_size overridden by model class definition to `disabled`")
             batch_size = 1 # but actually disabled
 
         if self.quantize_config.format == FORMAT.MARLIN:
diff --git a/gptqmodel/models/definitions/qwen3_moe.py b/gptqmodel/models/definitions/qwen3_moe.py
index 31e788838..e0f56602a 100644
--- a/gptqmodel/models/definitions/qwen3_moe.py
+++ b/gptqmodel/models/definitions/qwen3_moe.py
@@ -4,6 +4,7 @@
 # Contact: qubitium@modelcloud.ai, x.com/qubitium
 
 from ..base import BaseQModel
+from ...quantization import METHOD
 
 
 class Qwen3MoeQModel(BaseQModel):
@@ -33,3 +34,13 @@ class Qwen3MoeQModel(BaseQModel):
             },
         }
     ]
+
+    module_tree_overrides = {
+        METHOD.AWQ: [
+            {
+                "mlp": {
+                    "gate": ("gate",),
+                }
+            }
+        ]
+    }

From bb2bf937137957e81209a36ca29d84237a9c3d6d Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Thu, 16 Oct 2025 19:40:15 +0800
Subject: [PATCH 4/4] check self.module_tree_overrides is not None

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 gptqmodel/models/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
index 726bbc4a3..358e255f7 100644
--- a/gptqmodel/models/base.py
+++ b/gptqmodel/models/base.py
@@ -221,7 +221,7 @@ def __init__(
 
         quant_method = quantize_config.quant_method
         # override module_tree if need
-        if self.module_tree_overrides.get(quant_method) is not None:
+        if self.module_tree_overrides is not None and self.module_tree_overrides.get(quant_method) is not None:
             log.info(f'Module Tree: overridden by METHOD.{quant_method.upper()}')
             # setting cls.module_tree
             type(self).module_tree = apply_module_tree_override(self.module_tree, self.module_tree_overrides[quant_method])