ModelCloud · Qubitium · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025 · Sep 25, 2025
diff --git a/gptqmodel/models/base.py b/gptqmodel/models/base.py
@@ -1223,6 +1223,9 @@ def generate_layers_modules_tree_simple(self, node):
         # For other types, return as-is
         return node
 
+    def tied_word_embedding(self) -> bool:
+        return getattr(self.model.config, "tie_word_embeddings", False)
+
     def __getattr__(self, item):
         try:
             return super().__getattr__(item)

diff --git a/gptqmodel/models/writer.py b/gptqmodel/models/writer.py
@@ -35,9 +35,10 @@
 from ..utils.logger import setup_logger
 from ..utils.model import (convert_gptq_v2_to_v1_format, copy_py_files, find_modules, get_model_files_size,
                            get_state_dict_for_save, load_checkpoint_in_model_then_tie_weights, make_quant)
+from ..utils.offload import undo_offload_to_disk
 from ..utils.torch import torch_empty_cache
 from ..version import __version__
-from ._const import CPU, DEFAULT_MAX_SHARD_SIZE
+from ._const import DEFAULT_MAX_SHARD_SIZE
 
 log = setup_logger()
 
@@ -256,7 +257,22 @@ def debug_saved_config(path):
             self.processor.save_pretrained(save_dir)
         # --- end config save block ---
 
-        model.to(CPU)
+        # print_module_tree(model)
+        # model.tie_weights()
+        # undo_offload_to_disk(model)
+        # print("dbug 1")
+        # print_module_tree(model)
+
+        # TODO FIX ME..remove this ugly patch and find core issue why output_embedding is not retied after offload/undo_offload
+        input_embed = model.get_input_embeddings()
+        output_embed = model.get_output_embeddings()
+        undo_offload_to_disk(output_embed, delete_offload_folders=True)
+        undo_offload_to_disk(input_embed, delete_offload_folders=True)
+
+        # print("dbug 2")
+        # print_module_tree(model)
+
+        # model.to(CPU)
         state_dict = get_state_dict_for_save(model)
 
         model_base_name = "model"

diff --git a/gptqmodel/utils/offload.py b/gptqmodel/utils/offload.py
@@ -287,7 +287,7 @@ def undo_offload_to_disk(
         remove_hook_from_module(module, recurse=False)  # ensure root is also clean
 
         # 3) Tie embedding if module is model and enabled/tied
-        if hasattr(module, "config") and hasattr(module.config, "tie_word_embeddings") and module.config.tie_word_embeddings:
+        if hasattr(module, "config") and getattr(module.config, "tie_word_embeddings", False):
             module.tie_weights()  # makes lm_head.weight point to embed_tokens.weight again after undo_offload
 
         # 4) Optionally delete offload folders.

diff --git a/tests/test_quant_batch.py b/tests/test_quant_batch.py
@@ -19,7 +19,7 @@
 
 
 class TestQuantBatch(ModelTest):
-    NATIVE_MODEL_ID = "/monster/data/model/TinyLlama-1.1B-Chat-v1.0"
+    NATIVE_MODEL_ID = "/monster/data/model/Llama-3.2-1B-Instruct"
 
     def calculate_avg_ppl(self, model, tokenizer):
         ppl = Perplexity(