ModelCloud · Qubitium · Sep 27, 2025 · Sep 27, 2025 · Sep 27, 2025 · Sep 27, 2025
diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -168,7 +168,7 @@ def __init__(
         self.model = self.after_model_load(model, load_quantized_model=load_quantized_model)
         self.turtle_model = turtle_model
 
-        self.compiled = False # set to True while compile() is triggered successfully
+        self.compiled = False  # set to True while compile() is triggered successfully
         self.quantized = quantized
         self.load_quantized_model = load_quantized_model
         if tokenizer is not None:

diff --git a/gptqmodel/models/writer.py b/gptqmodel/models/writer.py
@@ -258,7 +258,8 @@ def debug_saved_config(path):
         # --- end config save block ---
 
         # Due to shell/turtle state, we need to sync the modules from turtle to shell
-        alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model)
+        if not self.load_quantized_model:
+            alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model)
 
         state_dict = get_state_dict_for_save(model)
 

diff --git a/gptqmodel/quantization/config.py b/gptqmodel/quantization/config.py
@@ -257,9 +257,12 @@ def __post_init__(self):
         if self.damp_percent is None:
             if self.quant_method == METHOD.QQQ:
                 self.damp_percent = 0.005
-                self.damp_auto_increment = 0.001
             else:
                 self.damp_percent = 0.05
+        if self.damp_auto_increment is None:
+            if self.quant_method == METHOD.QQQ:
+                self.damp_auto_increment = 0.001
+            else:
                 self.damp_auto_increment = 0.01
 
         # TODO FIXME awq compat which didn't have checkpoint_format before merging to gptqmodel

diff --git a/tests/test_serialization.py b/tests/test_serialization.py
@@ -32,7 +32,7 @@ def test_marlin_local_serialization(self):
             model = GPTQModel.load(tmpdir, device="cuda:0", backend=BACKEND.MARLIN)
 
     def test_gptq_v1_to_v2_runtime_convert(self):
-        model = GPTQModel.load(self.MODEL_ID, device="cuda:0")
+        model = GPTQModel.load(self.MODEL_ID, device="cuda:0", backend=BACKEND.EXLLAMA_V2)
         self.assertEqual(model.quantize_config.runtime_format, FORMAT.GPTQ_V2)
 
     def test_gptq_v1_serialization(self):