diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py index 55fb98926..0c644e6c2 100644 --- a/gptqmodel/utils/model.py +++ b/gptqmodel/utils/model.py @@ -576,6 +576,7 @@ def convert_gptq_v1_to_v2_format_module(module: BaseQuantLinear, bits: int, pack module.qzero_format(format=2) # Optionally convert weight from gptq_v1 to v2 format if Kernel is compatible with v2 +@torch.inference_mode() def convert_gptq_v1_to_v2_format( model, cfg: QuantizeConfig, @@ -651,6 +652,7 @@ def convert_gptq_v2_to_v1_format_module( module.qzero_format(format=1) # Optionally convert weight from gptq_v2 to v1 export format if Kernel is compatible with v2 +@torch.inference_mode() def convert_gptq_v2_to_v1_format( model, quantize_config: QuantizeConfig, diff --git a/tests/models/test_qwen3_moe.py b/tests/models/test_qwen3_moe.py index 90de5a090..7ef4ca150 100644 --- a/tests/models/test_qwen3_moe.py +++ b/tests/models/test_qwen3_moe.py @@ -18,9 +18,9 @@ class TestQwen3Moe(ModelTest): DEBUG = True ACT_GROUP_AWARE = True DESC_ACT = False - DATASET_SIZE = 1024 + DATASET_SIZE = 1024 * 4 DATASET_SORT = "desc" - QUANT_BATCH_SIZE = 4 + QUANT_BATCH_SIZE = 8 def test_mimo(self): self.quant_lm_eval()