ModelCloud · Qubitium · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025 · Oct 21, 2025
diff --git a/tests/models/model_test.py b/tests/models/model_test.py
@@ -261,7 +261,7 @@ def assertInference(self, model, tokenizer=None, keywords=None, prompt=INFERENCE
             if k.lower() in generated:
                 self.assertTrue(True)
                 return
-        self.assertTrue(False, f"none of keywords were found in generated: `{generated}`")
+        raise AssertionError(f"none of keywords were found in generated: `{generated}`")
 
     # note that sampling is disabled for help with deterministic generation for ci tests
     def generate(self, model, tokenizer, prompt=None):

diff --git a/tests/test_benchmark_gar.py b/tests/test_benchmark_gar.py
@@ -7,6 +7,7 @@
 from tabulate import tabulate
 
 from gptqmodel.quantization import gar
+from gptqmodel.quantization import gar_ref
 
 
 def _benchmark_fn(label, fn, device, warmup_runs=3, measured_runs=10):
@@ -54,9 +55,9 @@ def optimized_call():
         return result
 
     def original_call():
-        local = gar.compute_local_perms_original(diag_H, groupsize)
-        global_perm = gar.compute_global_perm_original(diag_H, groupsize)
-        return gar.compose_final_perm_original(local, global_perm, groupsize)
+        local = gar_ref.compute_local_perms_original(diag_H, groupsize)
+        global_perm = gar_ref.compute_global_perm_original(diag_H, groupsize)
+        return gar_ref.compose_final_perm_original(local, global_perm, groupsize)
 
     # Ensure both implementations agree before timing to detect accuracy regressions.
     optimized_result = optimized_call()
@@ -137,9 +138,9 @@ def test_gar_accuracy_randomized(seed):
     )
     opt_final = gar.compose_final_perm(opt_local, opt_global, groupsize)
 
-    orig_local = gar.compute_local_perms_original(diag_H, groupsize)
-    orig_global = gar.compute_global_perm_original(diag_H, groupsize)
-    orig_final = gar.compose_final_perm_original(orig_local, orig_global, groupsize)
+    orig_local = gar_ref.compute_local_perms_original(diag_H, groupsize)
+    orig_global = gar_ref.compute_global_perm_original(diag_H, groupsize)
+    orig_final = gar_ref.compose_final_perm_original(orig_local, orig_global, groupsize)
 
     opt_perm_values = diag_H[opt_final]
     orig_perm_values = diag_H[orig_final]

diff --git a/tests/test_bits.py b/tests/test_bits.py
@@ -82,8 +82,7 @@ def test_bits(self):
         # quantize
         model_id = "/monster/data/model/Qwen2.5-0.5B-Instruct"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
-        dataset = ["gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
-        calibration_dataset = [tokenizer(example) for example in dataset]
+        calibration_dataset = ["gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."]
 
         for quant_backend in self.pack_backends:
             supports_bits = self.QLINEAR_DICT[quant_backend].SUPPORTS_BITS

diff --git a/tests/test_dynamic.py b/tests/test_dynamic.py
@@ -145,4 +145,4 @@ def test_skip_module(self):
             del model
 
             q_model = GPTQModel.load(tmp_dir)
-            self.assertInference(model=q_model,tokenizer=self.tokenizer)
+            self.assertInference(model=q_model,tokenizer=self.tokenizer,keywords=["paris", "king"])