From 4f0a9de33d84acff060dcf431709d8d8f953b88d Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Tue, 21 Oct 2025 16:17:19 +0800
Subject: [PATCH 1/4] fix test_benchmark_gar.py

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 tests/test_benchmark_gar.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/test_benchmark_gar.py b/tests/test_benchmark_gar.py
index e6e0db1c8..03b2b5feb 100644
--- a/tests/test_benchmark_gar.py
+++ b/tests/test_benchmark_gar.py
@@ -7,6 +7,7 @@
 from tabulate import tabulate
 
 from gptqmodel.quantization import gar
+from gptqmodel.quantization import gar_ref
 
 
 def _benchmark_fn(label, fn, device, warmup_runs=3, measured_runs=10):
@@ -137,9 +138,9 @@ def test_gar_accuracy_randomized(seed):
     )
     opt_final = gar.compose_final_perm(opt_local, opt_global, groupsize)
 
-    orig_local = gar.compute_local_perms_original(diag_H, groupsize)
-    orig_global = gar.compute_global_perm_original(diag_H, groupsize)
-    orig_final = gar.compose_final_perm_original(orig_local, orig_global, groupsize)
+    orig_local = gar_ref.compute_local_perms_original(diag_H, groupsize)
+    orig_global = gar_ref.compute_global_perm_original(diag_H, groupsize)
+    orig_final = gar_ref.compose_final_perm_original(orig_local, orig_global, groupsize)
 
     opt_perm_values = diag_H[opt_final]
     orig_perm_values = diag_H[orig_final]

From abba937dbc9bbf3a83531b1ae447e40b3c25cd34 Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Tue, 21 Oct 2025 17:29:13 +0800
Subject: [PATCH 2/4] fix test_dynamic.py

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 tests/models/model_test.py | 2 +-
 tests/test_dynamic.py      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/models/model_test.py b/tests/models/model_test.py
index 8a2a3d9f7..ef5a07054 100644
--- a/tests/models/model_test.py
+++ b/tests/models/model_test.py
@@ -261,7 +261,7 @@ def assertInference(self, model, tokenizer=None, keywords=None, prompt=INFERENCE
             if k.lower() in generated:
                 self.assertTrue(True)
                 return
-        self.assertTrue(False, f"none of keywords were found in generated: `{generated}`")
+        raise AssertionError(f"none of keywords were found in generated: `{generated}`")
 
     # note that sampling is disabled for help with deterministic generation for ci tests
     def generate(self, model, tokenizer, prompt=None):
diff --git a/tests/test_dynamic.py b/tests/test_dynamic.py
index aa6422b35..5e0c4cf6e 100644
--- a/tests/test_dynamic.py
+++ b/tests/test_dynamic.py
@@ -145,4 +145,4 @@ def test_skip_module(self):
             del model
 
             q_model = GPTQModel.load(tmp_dir)
-            self.assertInference(model=q_model,tokenizer=self.tokenizer)
+            self.assertInference(model=q_model,tokenizer=self.tokenizer,keywords=["paris", "king"])

From 9f383cf218c372a945076e80f14d20185c39e820 Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Tue, 21 Oct 2025 17:29:28 +0800
Subject: [PATCH 3/4] fix test_bits.py

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 tests/test_bits.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_bits.py b/tests/test_bits.py
index 5297fa637..8f1ae4d5f 100644
--- a/tests/test_bits.py
+++ b/tests/test_bits.py
@@ -82,8 +82,7 @@ def test_bits(self):
         # quantize
         model_id = "/monster/data/model/Qwen2.5-0.5B-Instruct"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
-        dataset = ["gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
-        calibration_dataset = [tokenizer(example) for example in dataset]
+        calibration_dataset = ["gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
 
         for quant_backend in self.pack_backends:
             supports_bits = self.QLINEAR_DICT[quant_backend].SUPPORTS_BITS

From d515f83a2fffe194dc07073496e4d7c82a8a71de Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Tue, 21 Oct 2025 17:29:36 +0800
Subject: [PATCH 4/4] fix test_benchmark_gar.py

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 tests/test_benchmark_gar.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_benchmark_gar.py b/tests/test_benchmark_gar.py
index 03b2b5feb..4a2f74e51 100644
--- a/tests/test_benchmark_gar.py
+++ b/tests/test_benchmark_gar.py
@@ -55,9 +55,9 @@ def optimized_call():
         return result
 
     def original_call():
-        local = gar.compute_local_perms_original(diag_H, groupsize)
-        global_perm = gar.compute_global_perm_original(diag_H, groupsize)
-        return gar.compose_final_perm_original(local, global_perm, groupsize)
+        local = gar_ref.compute_local_perms_original(diag_H, groupsize)
+        global_perm = gar_ref.compute_global_perm_original(diag_H, groupsize)
+        return gar_ref.compose_final_perm_original(local, global_perm, groupsize)
 
     # Ensure both implementations agree before timing to detect accuracy regressions.
     optimized_result = optimized_call()