From d2a64e89aceaf30ea96f6b64fd278ac3a2f9e50c Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Sat, 27 Sep 2025 11:49:48 +0800 Subject: [PATCH 1/3] fix damp_auto_increment default value Signed-off-by: ZX-ModelCloud --- gptqmodel/quantization/config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gptqmodel/quantization/config.py b/gptqmodel/quantization/config.py index e6a9259fa..90c40f4ea 100644 --- a/gptqmodel/quantization/config.py +++ b/gptqmodel/quantization/config.py @@ -257,9 +257,12 @@ def __post_init__(self): if self.damp_percent is None: if self.quant_method == METHOD.QQQ: self.damp_percent = 0.005 - self.damp_auto_increment = 0.001 else: self.damp_percent = 0.05 + if self.damp_auto_increment is None: + if self.quant_method == METHOD.QQQ: + self.damp_auto_increment = 0.001 + else: self.damp_auto_increment = 0.01 # TODO FIXME awq compat which didn't have checkpoint_format before merging to gptqmodel From bc4e46b60a8a5ba83831d0f65b343360cc4c061f Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Sat, 27 Sep 2025 13:32:50 +0800 Subject: [PATCH 2/3] if load_quantized_model=True, don't call alias_all_from_turtle_if_meta(). because turtle_model is None. Signed-off-by: ZX-ModelCloud --- gptqmodel/models/base.py | 2 +- gptqmodel/models/writer.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py index 4ee513de5..8f120fa05 100644 --- a/gptqmodel/models/base.py +++ b/gptqmodel/models/base.py @@ -168,7 +168,7 @@ def __init__( self.model = self.after_model_load(model, load_quantized_model=load_quantized_model) self.turtle_model = turtle_model - self.compiled = False # set to True while compile() is triggered successfully + self.compiled = False # set to True while compile() is triggered successfully self.quantized = quantized self.load_quantized_model = load_quantized_model if tokenizer is not None: diff --git a/gptqmodel/models/writer.py b/gptqmodel/models/writer.py index 9aae554f9..e7b512049 100644 --- a/gptqmodel/models/writer.py +++ b/gptqmodel/models/writer.py @@ -258,7 +258,8 @@ def debug_saved_config(path): # --- end config save block --- # Due to shell/turtle state, we need to sync the modules from turtle to shell - alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model) + if not self.load_quantized_model: + alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model) state_dict = get_state_dict_for_save(model) From cab983c5c38ac955eea0edbd82a597021ca4e5b4 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Sat, 27 Sep 2025 13:33:05 +0800 Subject: [PATCH 3/3] fix test_gptq_v1_to_v2_runtime_convert Signed-off-by: ZX-ModelCloud --- tests/test_serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_serialization.py b/tests/test_serialization.py index b55bed512..77ffc52bd 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -32,7 +32,7 @@ def test_marlin_local_serialization(self): model = GPTQModel.load(tmpdir, device="cuda:0", backend=BACKEND.MARLIN) def test_gptq_v1_to_v2_runtime_convert(self): - model = GPTQModel.load(self.MODEL_ID, device="cuda:0") + model = GPTQModel.load(self.MODEL_ID, device="cuda:0", backend=BACKEND.EXLLAMA_V2) self.assertEqual(model.quantize_config.runtime_format, FORMAT.GPTQ_V2) def test_gptq_v1_serialization(self):