diff --git a/tests/models/model_test.py b/tests/models/model_test.py index 8a2a3d9f7..ef5a07054 100644 --- a/tests/models/model_test.py +++ b/tests/models/model_test.py @@ -261,7 +261,7 @@ def assertInference(self, model, tokenizer=None, keywords=None, prompt=INFERENCE if k.lower() in generated: self.assertTrue(True) return - self.assertTrue(False, f"none of keywords were found in generated: `{generated}`") + raise AssertionError(f"none of keywords were found in generated: `{generated}`") # note that sampling is disabled for help with deterministic generation for ci tests def generate(self, model, tokenizer, prompt=None): diff --git a/tests/test_benchmark_gar.py b/tests/test_benchmark_gar.py index e6e0db1c8..4a2f74e51 100644 --- a/tests/test_benchmark_gar.py +++ b/tests/test_benchmark_gar.py @@ -7,6 +7,7 @@ from tabulate import tabulate from gptqmodel.quantization import gar +from gptqmodel.quantization import gar_ref def _benchmark_fn(label, fn, device, warmup_runs=3, measured_runs=10): @@ -54,9 +55,9 @@ def optimized_call(): return result def original_call(): - local = gar.compute_local_perms_original(diag_H, groupsize) - global_perm = gar.compute_global_perm_original(diag_H, groupsize) - return gar.compose_final_perm_original(local, global_perm, groupsize) + local = gar_ref.compute_local_perms_original(diag_H, groupsize) + global_perm = gar_ref.compute_global_perm_original(diag_H, groupsize) + return gar_ref.compose_final_perm_original(local, global_perm, groupsize) # Ensure both implementations agree before timing to detect accuracy regressions. optimized_result = optimized_call() @@ -137,9 +138,9 @@ def test_gar_accuracy_randomized(seed): ) opt_final = gar.compose_final_perm(opt_local, opt_global, groupsize) - orig_local = gar.compute_local_perms_original(diag_H, groupsize) - orig_global = gar.compute_global_perm_original(diag_H, groupsize) - orig_final = gar.compose_final_perm_original(orig_local, orig_global, groupsize) + orig_local = gar_ref.compute_local_perms_original(diag_H, groupsize) + orig_global = gar_ref.compute_global_perm_original(diag_H, groupsize) + orig_final = gar_ref.compose_final_perm_original(orig_local, orig_global, groupsize) opt_perm_values = diag_H[opt_final] orig_perm_values = diag_H[orig_final] diff --git a/tests/test_bits.py b/tests/test_bits.py index 5297fa637..8f1ae4d5f 100644 --- a/tests/test_bits.py +++ b/tests/test_bits.py @@ -82,8 +82,7 @@ def test_bits(self): # quantize model_id = "/monster/data/model/Qwen2.5-0.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) - dataset = ["gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."] - calibration_dataset = [tokenizer(example) for example in dataset] + calibration_dataset = ["gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."] for quant_backend in self.pack_backends: supports_bits = self.QLINEAR_DICT[quant_backend].SUPPORTS_BITS diff --git a/tests/test_dynamic.py b/tests/test_dynamic.py index aa6422b35..5e0c4cf6e 100644 --- a/tests/test_dynamic.py +++ b/tests/test_dynamic.py @@ -145,4 +145,4 @@ def test_skip_module(self): del model q_model = GPTQModel.load(tmp_dir) - self.assertInference(model=q_model,tokenizer=self.tokenizer) + self.assertInference(model=q_model,tokenizer=self.tokenizer,keywords=["paris", "king"])