From d2a64e89aceaf30ea96f6b64fd278ac3a2f9e50c Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Sat, 27 Sep 2025 11:49:48 +0800
Subject: [PATCH 1/3] fix damp_auto_increment default value

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 gptqmodel/quantization/config.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gptqmodel/quantization/config.py b/gptqmodel/quantization/config.py
index e6a9259fa..90c40f4ea 100644
--- a/gptqmodel/quantization/config.py
+++ b/gptqmodel/quantization/config.py
@@ -257,9 +257,12 @@ def __post_init__(self):
         if self.damp_percent is None:
             if self.quant_method == METHOD.QQQ:
                 self.damp_percent = 0.005
-                self.damp_auto_increment = 0.001
             else:
                 self.damp_percent = 0.05
+        if self.damp_auto_increment is None:
+            if self.quant_method == METHOD.QQQ:
+                self.damp_auto_increment = 0.001
+            else:
                 self.damp_auto_increment = 0.01
 
         # TODO FIXME awq compat which didn't have checkpoint_format before merging to gptqmodel

From bc4e46b60a8a5ba83831d0f65b343360cc4c061f Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Sat, 27 Sep 2025 13:32:50 +0800
Subject: [PATCH 2/3] if load_quantized_model=True, don't call
 alias_all_from_turtle_if_meta(). because turtle_model is None.

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 gptqmodel/models/base.py   | 2 +-
 gptqmodel/models/writer.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
index 4ee513de5..8f120fa05 100644
--- a/gptqmodel/models/base.py
+++ b/gptqmodel/models/base.py
@@ -168,7 +168,7 @@ def __init__(
         self.model = self.after_model_load(model, load_quantized_model=load_quantized_model)
         self.turtle_model = turtle_model
 
-        self.compiled = False # set to True while compile() is triggered successfully
+        self.compiled = False  # set to True while compile() is triggered successfully
         self.quantized = quantized
         self.load_quantized_model = load_quantized_model
         if tokenizer is not None:
diff --git a/gptqmodel/models/writer.py b/gptqmodel/models/writer.py
index 9aae554f9..e7b512049 100644
--- a/gptqmodel/models/writer.py
+++ b/gptqmodel/models/writer.py
@@ -258,7 +258,8 @@ def debug_saved_config(path):
         # --- end config save block ---
 
         # Due to shell/turtle state, we need to sync the modules from turtle to shell
-        alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model)
+        if not self.load_quantized_model:
+            alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model)
 
         state_dict = get_state_dict_for_save(model)
 

From cab983c5c38ac955eea0edbd82a597021ca4e5b4 Mon Sep 17 00:00:00 2001
From: ZX-ModelCloud <zx@modelcloud.ai>
Date: Sat, 27 Sep 2025 13:33:05 +0800
Subject: [PATCH 3/3] fix test_gptq_v1_to_v2_runtime_convert

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
---
 tests/test_serialization.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_serialization.py b/tests/test_serialization.py
index b55bed512..77ffc52bd 100644
--- a/tests/test_serialization.py
+++ b/tests/test_serialization.py
@@ -32,7 +32,7 @@ def test_marlin_local_serialization(self):
             model = GPTQModel.load(tmpdir, device="cuda:0", backend=BACKEND.MARLIN)
 
     def test_gptq_v1_to_v2_runtime_convert(self):
-        model = GPTQModel.load(self.MODEL_ID, device="cuda:0")
+        model = GPTQModel.load(self.MODEL_ID, device="cuda:0", backend=BACKEND.EXLLAMA_V2)
         self.assertEqual(model.quantize_config.runtime_format, FORMAT.GPTQ_V2)
 
     def test_gptq_v1_serialization(self):