ModelCloud · Qubitium · Oct 11, 2025 · Oct 11, 2025 · Oct 11, 2025
diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -1323,6 +1323,9 @@ def _clone_model_init_kwargs(self, source: PreTrainedModel) -> Dict[str, Any]:
         return copy.deepcopy(kwargs)
 
     def reload_turtle_model(self) -> None:
+        if self.quantize_config.offload_to_disk is False:
+            return
+
         if threading.current_thread() is not threading.main_thread():
             raise RuntimeError("Turtle reloads must run on the main thread")
 

diff --git a/gptqmodel/models/definitions/bailing_moe.py b/gptqmodel/models/definitions/bailing_moe.py
@@ -20,11 +20,11 @@ class BailingMoeQModel(BaseQModel):
         "#",
         {
             "input_layernorm": ("input_layernorm:!",),
-            "self_attn": ("query_key_value"),
+            "attention": ("query_key_value"),
             "post_attention_layernorm": ("post_attention_layernorm:!",),
             "mlp": {
                 "gate": ("gate:!",), # <-- 0.5MB per layer. Not worth quantizing
-                "shared_expert": ("gate_proj", "up_proj", "down_proj"),
+                "shared_experts": ("gate_proj", "up_proj", "down_proj"),
                 "experts": {
                     "#": ("gate_proj:0", "up_proj:0", "down_proj:1"),
                 },

diff --git a/tests/models/model_test.py b/tests/models/model_test.py
@@ -212,7 +212,7 @@ def run_arc_challenge_eval(self, model, backend, trust_remote_code=False):
             task_results = self.lm_eval(
                 model=model,
                 apply_chat_template=self.APPLY_CHAT_TEMPLATE,
-                trust_remote_code=trust_remote_code,
+                trust_remote_code=self.TRUST_REMOTE_CODE,
                 delete_quantized_model=False,
             )
             log.info(f"[{backend.name}] ARC summary: {task_results}")

diff --git a/tests/models/test_ling.py b/tests/models/test_ling.py
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: 2024-2025 ModelCloud.ai
+# SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai
+# SPDX-License-Identifier: Apache-2.0
+# Contact: qubitium@modelcloud.ai, x.com/qubitium
+
+from model_test import ModelTest
+
+
+class TestLing(ModelTest):
+    NATIVE_MODEL_ID = "/monster/data/model/Ling-mini-2.0/"
+    QUANT_ARC_MAX_DELTA_FLOOR_PERCENT = 0.2
+    NATIVE_ARC_CHALLENGE_ACC = 0.5009
+    NATIVE_ARC_CHALLENGE_ACC_NORM = 0.5137
+    TRUST_REMOTE_CODE = True
+    APPLY_CHAT_TEMPLATE = True
+    # EVAL_BATCH_SIZE = 6
+    V2 = False
+    DEBUG = True
+    ACT_GROUP_AWARE = True
+    DESC_ACT = False
+    DATASET_SIZE = 2048
+    DATASET_SORT = "desc"
+    QUANT_BATCH_SIZE = 8
+    CALIB_NOISE_MODE = "unseen"
+    CALIB_NOISE_PERCENT = 0.025
+
+    def test_mimo(self):
+        self.quant_lm_eval()