ModelCloud · Qubitium · Oct 10, 2025 · Oct 10, 2025
diff --git a/README.md b/README.md
@@ -178,20 +178,21 @@ Native support support some of the most popular multi-modal models:
 </div>
 
 ## Model Support  
-| Model             |   |                   |   |                |   |                |   |               |   |
-|-------------------|---|-------------------|---|----------------|---|----------------|---|---------------|---|
-| Apertus           | ✅ | EXAONE 3.0        | ✅ | InternLM 1/2.5 | ✅ | MobileLLM      | ✅ | Qwen 2/2.5 VL | ✅ |
-| Baichuan          | ✅ | Falcon (H1)       | ✅ | Kimi K2        | ✅ | MOSS           | ✅ | Qwen 2.5/3 Omni | ✅ |
-| Bloom             | ✅ | FastVLM           | ✅ | Klear          | ✅ | MPT            | ✅ | RefinedWeb    | ✅ |
-| ChatGLM           | ✅ | Gemma 1/2/3       | ✅ | Llama 1-3.3    | ✅ | Nemotron H     | ✅ | StableLM      | ✅ |
-| CodeGen           | ✅ | GPTBigCod         | ✅ | Llama 3.2 VL   | ✅ | Nemotron Ultra | ✅ | StarCoder2    | ✅ |
-| Cohere 1-2        | ✅ | GPTQ-Neo/GPT-NeoX | ✅ | Llama 4        | ✅ | OPT            | ✅ | TeleChat2     | ✅ |
-| DBRX Converted    | ✅ | GPT-2             | ✅ | LongCatFlash   | ✅ | OLMo2          | ✅ | Yi            | ✅ |
-| Deci              | ✅ | GPT-J             | ✅ | LongLLaMA      | ✅ | Ovis 1.6/2     | ✅ | Seed-OSS      | ✅ |
-| DeepSeek-V2/V3/R1 | ✅ | GPT-OSS           | ✅ | Instella       | ✅ | Phi 1-4        | ✅ | XVERSE        | ✅ |
-| DeepSeek-V2-Lite  | ✅ | Granite           | ✅ | MiniCPM3       | ✅ | PanGu-α        | ✅ |               |   |
-| Dream             | ✅ | GRIN-MoE          | ✅ | Mistral        | ✅ | Qwen 1/2/3     | ✅ |               |   |
-| ERNIE 4.5         | ✅ | Hymba             | ✅ | Mixtral        | ✅ | Qwen 2/3 (Next/MoE)   | ✅ |               |   |
+| Model             |   |                   |   |                |   |                |   |                     |   |
+|-------------------|---|-------------------|---|----------------|---|----------------|---|---------------------|---|
+| Apertus           | ✅ | EXAONE 3.0        | ✅ | InternLM 1/2.5 | ✅ | Mixtral        | ✅ | Qwen 2/3 (Next/MoE) | ✅ |
+| Baichuan          | ✅ | Falcon (H1)       | ✅ | Kimi K2        | ✅ | MobileLLM      | ✅ | Qwen 2/2.5 VL       | ✅ |
+| Bloom             | ✅ | FastVLM           | ✅ | Klear          | ✅ | MOSS           | ✅ | Qwen 2.5/3 Omni     | ✅ |
+| ChatGLM           | ✅ | Gemma 1/2/3       | ✅ | LING/RING      | ✅ | MPT            | ✅ | RefinedWeb          | ✅ |
+| CodeGen           | ✅ | GPTBigCod         | ✅ | Llama 1-3.3    | ✅ | Nemotron H     | ✅ | StableLM            | ✅ |
+| Cohere 1-2        | ✅ | GPTQ-Neo/GPT-NeoX | ✅ | Llama 3.2 VL   | ✅ | Nemotron Ultra | ✅ | StarCoder2          | ✅ |
+| DBRX Converted    | ✅ | GPT-2             | ✅ | Llama 4        | ✅ | OPT            | ✅ | TeleChat2           | ✅ |
+| Deci              | ✅ | GPT-J             | ✅ | LongCatFlash   | ✅ | OLMo2          | ✅ | Yi                  | ✅ |
+| DeepSeek-V2/V3/R1 | ✅ | GPT-OSS           | ✅ | LongLLaMA      | ✅ | Ovis 1.6/2     | ✅ | Seed-OSS            | ✅ |
+| DeepSeek-V2-Lite  | ✅ | Granite           | ✅ | Instella       | ✅ | Phi 1-4        | ✅ | XVERSE              | ✅ |
+| Dream             | ✅ | GRIN-MoE          | ✅ | MiniCPM3       | ✅ | PanGu-α        | ✅ |                     |   |
+| ERNIE 4.5         | ✅ | Hymba             | ✅ | Mistral        | ✅ | Qwen 1/2/3     | ✅ |                     |   |
+
 
 ## Platform and HW Support 
 

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -118,6 +118,8 @@
 from .definitions.starcoder2 import Starcoder2QModel  # noqa: E402
 from .definitions.telechat2 import TeleChat2QModel
 from .definitions.xverse import XverseQModel  # noqa: E402
+from .definitions.bailing_moe import BailingMoeQModel  # noqa: E402
+
 
 
 # make quants and inference more determinisitc
@@ -208,6 +210,7 @@
     "longcat_flash": LongCatFlashQModel,
     "llava_qwen2": LlavaQwen2QModel,
     "nemotron_h": NemotronHQModel,
+    "bailing_moe": BailingMoeQModel,
 }
 
 SUPPORTED_MODELS = list(MODEL_MAP.keys())

diff --git a/gptqmodel/models/definitions/bailing_moe.py b/gptqmodel/models/definitions/bailing_moe.py
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+
+from ..base import BaseQModel
+
+
+class BailingMoeQModel(BaseQModel):
+    # allow dynamic expert index for layer_modules so we don't need to write out 64 layers here
+    # config.num_experts contains the actual expert count used for index
+    dynamic_expert_index = "num_experts"
+    layer_modules_strict = False
+
+    pre_lm_head_norm_module = "model.norm"
+
+    module_tree = [
+        "model",
+        "layers",
+        "#",
+        {
+            "input_layernorm": ("input_layernorm:!",),
+            "self_attn": ("query_key_value"),
+            "post_attention_layernorm": ("post_attention_layernorm:!",),
+            "mlp": {
+                "gate": ("gate:!",), # <-- 0.5MB per layer. Not worth quantizing
+                "shared_expert": ("gate_proj", "up_proj", "down_proj"),
+                "experts": {
+                    "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
+                },
+            },
+        }
+    ]