ModelCloud · Qubitium · Oct 1, 2025 · Oct 1, 2025
diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py
@@ -576,6 +576,7 @@ def convert_gptq_v1_to_v2_format_module(module: BaseQuantLinear, bits: int, pack
     module.qzero_format(format=2)
 
 # Optionally convert weight from gptq_v1 to v2 format if Kernel is compatible with v2
+@torch.inference_mode()
 def convert_gptq_v1_to_v2_format(
     model,
     cfg: QuantizeConfig,
@@ -651,6 +652,7 @@ def convert_gptq_v2_to_v1_format_module(
     module.qzero_format(format=1)
 
 # Optionally convert weight from gptq_v2 to v1 export format if Kernel is compatible with v2
+@torch.inference_mode()
 def convert_gptq_v2_to_v1_format(
     model,
     quantize_config: QuantizeConfig,

diff --git a/tests/models/test_qwen3_moe.py b/tests/models/test_qwen3_moe.py
@@ -18,9 +18,9 @@ class TestQwen3Moe(ModelTest):
     DEBUG = True
     ACT_GROUP_AWARE = True
     DESC_ACT = False
-    DATASET_SIZE = 1024
+    DATASET_SIZE = 1024 * 4
     DATASET_SORT = "desc"
-    QUANT_BATCH_SIZE = 4
+    QUANT_BATCH_SIZE = 8
 
     def test_mimo(self):
         self.quant_lm_eval()