From 8dc13880833f23c88bfaad609b6e013c512f0547 Mon Sep 17 00:00:00 2001
From: Qubitium <Qubitium@modelcloud.ai>
Date: Wed, 1 Oct 2025 13:22:47 +0000
Subject: [PATCH] fix ctx for convert v1-v2 v2-v1 using in_place tensor
 mutation

Signed-off-by: Qubitium <Qubitium@modelcloud.ai>
---
 gptqmodel/utils/model.py       | 2 ++
 tests/models/test_qwen3_moe.py | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py
index 55fb98926..0c644e6c2 100644
--- a/gptqmodel/utils/model.py
+++ b/gptqmodel/utils/model.py
@@ -576,6 +576,7 @@ def convert_gptq_v1_to_v2_format_module(module: BaseQuantLinear, bits: int, pack
     module.qzero_format(format=2)
 
 # Optionally convert weight from gptq_v1 to v2 format if Kernel is compatible with v2
+@torch.inference_mode()
 def convert_gptq_v1_to_v2_format(
     model,
     cfg: QuantizeConfig,
@@ -651,6 +652,7 @@ def convert_gptq_v2_to_v1_format_module(
     module.qzero_format(format=1)
 
 # Optionally convert weight from gptq_v2 to v1 export format if Kernel is compatible with v2
+@torch.inference_mode()
 def convert_gptq_v2_to_v1_format(
     model,
     quantize_config: QuantizeConfig,
diff --git a/tests/models/test_qwen3_moe.py b/tests/models/test_qwen3_moe.py
index 90de5a090..7ef4ca150 100644
--- a/tests/models/test_qwen3_moe.py
+++ b/tests/models/test_qwen3_moe.py
@@ -18,9 +18,9 @@ class TestQwen3Moe(ModelTest):
     DEBUG = True
     ACT_GROUP_AWARE = True
     DESC_ACT = False
-    DATASET_SIZE = 1024
+    DATASET_SIZE = 1024 * 4
     DATASET_SORT = "desc"
-    QUANT_BATCH_SIZE = 4
+    QUANT_BATCH_SIZE = 8
 
     def test_mimo(self):
         self.quant_lm_eval()