From 8dc13880833f23c88bfaad609b6e013c512f0547 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Wed, 1 Oct 2025 13:22:47 +0000 Subject: [PATCH] fix ctx for convert v1-v2 v2-v1 using in_place tensor mutation Signed-off-by: Qubitium --- gptqmodel/utils/model.py | 2 ++ tests/models/test_qwen3_moe.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py index 55fb98926..0c644e6c2 100644 --- a/gptqmodel/utils/model.py +++ b/gptqmodel/utils/model.py @@ -576,6 +576,7 @@ def convert_gptq_v1_to_v2_format_module(module: BaseQuantLinear, bits: int, pack module.qzero_format(format=2) # Optionally convert weight from gptq_v1 to v2 format if Kernel is compatible with v2 +@torch.inference_mode() def convert_gptq_v1_to_v2_format( model, cfg: QuantizeConfig, @@ -651,6 +652,7 @@ def convert_gptq_v2_to_v1_format_module( module.qzero_format(format=1) # Optionally convert weight from gptq_v2 to v1 export format if Kernel is compatible with v2 +@torch.inference_mode() def convert_gptq_v2_to_v1_format( model, quantize_config: QuantizeConfig, diff --git a/tests/models/test_qwen3_moe.py b/tests/models/test_qwen3_moe.py index 90de5a090..7ef4ca150 100644 --- a/tests/models/test_qwen3_moe.py +++ b/tests/models/test_qwen3_moe.py @@ -18,9 +18,9 @@ class TestQwen3Moe(ModelTest): DEBUG = True ACT_GROUP_AWARE = True DESC_ACT = False - DATASET_SIZE = 1024 + DATASET_SIZE = 1024 * 4 DATASET_SORT = "desc" - QUANT_BATCH_SIZE = 4 + QUANT_BATCH_SIZE = 8 def test_mimo(self): self.quant_lm_eval()