From 4f0a9de33d84acff060dcf431709d8d8f953b88d Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Tue, 21 Oct 2025 16:17:19 +0800 Subject: [PATCH 1/4] fix test_benchmark_gar.py Signed-off-by: ZX-ModelCloud --- tests/test_benchmark_gar.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_benchmark_gar.py b/tests/test_benchmark_gar.py index e6e0db1c8..03b2b5feb 100644 --- a/tests/test_benchmark_gar.py +++ b/tests/test_benchmark_gar.py @@ -7,6 +7,7 @@ from tabulate import tabulate from gptqmodel.quantization import gar +from gptqmodel.quantization import gar_ref def _benchmark_fn(label, fn, device, warmup_runs=3, measured_runs=10): @@ -137,9 +138,9 @@ def test_gar_accuracy_randomized(seed): ) opt_final = gar.compose_final_perm(opt_local, opt_global, groupsize) - orig_local = gar.compute_local_perms_original(diag_H, groupsize) - orig_global = gar.compute_global_perm_original(diag_H, groupsize) - orig_final = gar.compose_final_perm_original(orig_local, orig_global, groupsize) + orig_local = gar_ref.compute_local_perms_original(diag_H, groupsize) + orig_global = gar_ref.compute_global_perm_original(diag_H, groupsize) + orig_final = gar_ref.compose_final_perm_original(orig_local, orig_global, groupsize) opt_perm_values = diag_H[opt_final] orig_perm_values = diag_H[orig_final] From abba937dbc9bbf3a83531b1ae447e40b3c25cd34 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Tue, 21 Oct 2025 17:29:13 +0800 Subject: [PATCH 2/4] fix test_dynamic.py Signed-off-by: ZX-ModelCloud --- tests/models/model_test.py | 2 +- tests/test_dynamic.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/models/model_test.py b/tests/models/model_test.py index 8a2a3d9f7..ef5a07054 100644 --- a/tests/models/model_test.py +++ b/tests/models/model_test.py @@ -261,7 +261,7 @@ def assertInference(self, model, tokenizer=None, keywords=None, prompt=INFERENCE if k.lower() in generated: self.assertTrue(True) return - self.assertTrue(False, f"none of keywords were found in generated: `{generated}`") + raise AssertionError(f"none of keywords were found in generated: `{generated}`") # note that sampling is disabled for help with deterministic generation for ci tests def generate(self, model, tokenizer, prompt=None): diff --git a/tests/test_dynamic.py b/tests/test_dynamic.py index aa6422b35..5e0c4cf6e 100644 --- a/tests/test_dynamic.py +++ b/tests/test_dynamic.py @@ -145,4 +145,4 @@ def test_skip_module(self): del model q_model = GPTQModel.load(tmp_dir) - self.assertInference(model=q_model,tokenizer=self.tokenizer) + self.assertInference(model=q_model,tokenizer=self.tokenizer,keywords=["paris", "king"]) From 9f383cf218c372a945076e80f14d20185c39e820 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Tue, 21 Oct 2025 17:29:28 +0800 Subject: [PATCH 3/4] fix test_bits.py Signed-off-by: ZX-ModelCloud --- tests/test_bits.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_bits.py b/tests/test_bits.py index 5297fa637..8f1ae4d5f 100644 --- a/tests/test_bits.py +++ b/tests/test_bits.py @@ -82,8 +82,7 @@ def test_bits(self): # quantize model_id = "/monster/data/model/Qwen2.5-0.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_id) - dataset = ["gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."] - calibration_dataset = [tokenizer(example) for example in dataset] + calibration_dataset = ["gptqmodel is an easy-to-use model quantization library with user-friendly apis, based on GPTQ algorithm."] for quant_backend in self.pack_backends: supports_bits = self.QLINEAR_DICT[quant_backend].SUPPORTS_BITS From d515f83a2fffe194dc07073496e4d7c82a8a71de Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Tue, 21 Oct 2025 17:29:36 +0800 Subject: [PATCH 4/4] fix test_benchmark_gar.py Signed-off-by: ZX-ModelCloud --- tests/test_benchmark_gar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_benchmark_gar.py b/tests/test_benchmark_gar.py index 03b2b5feb..4a2f74e51 100644 --- a/tests/test_benchmark_gar.py +++ b/tests/test_benchmark_gar.py @@ -55,9 +55,9 @@ def optimized_call(): return result def original_call(): - local = gar.compute_local_perms_original(diag_H, groupsize) - global_perm = gar.compute_global_perm_original(diag_H, groupsize) - return gar.compose_final_perm_original(local, global_perm, groupsize) + local = gar_ref.compute_local_perms_original(diag_H, groupsize) + global_perm = gar_ref.compute_global_perm_original(diag_H, groupsize) + return gar_ref.compose_final_perm_original(local, global_perm, groupsize) # Ensure both implementations agree before timing to detect accuracy regressions. optimized_result = optimized_call()