Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions gptqmodel/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,9 @@ def generate_layers_modules_tree_simple(self, node):
# For other types, return as-is
return node

def tied_word_embedding(self) -> bool:
return getattr(self.model.config, "tie_word_embeddings", False)

def __getattr__(self, item):
try:
return super().__getattr__(item)
Expand Down
20 changes: 18 additions & 2 deletions gptqmodel/models/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@
from ..utils.logger import setup_logger
from ..utils.model import (convert_gptq_v2_to_v1_format, copy_py_files, find_modules, get_model_files_size,
get_state_dict_for_save, load_checkpoint_in_model_then_tie_weights, make_quant)
from ..utils.offload import undo_offload_to_disk
from ..utils.torch import torch_empty_cache
from ..version import __version__
from ._const import CPU, DEFAULT_MAX_SHARD_SIZE
from ._const import DEFAULT_MAX_SHARD_SIZE

log = setup_logger()

Expand Down Expand Up @@ -256,7 +257,22 @@ def debug_saved_config(path):
self.processor.save_pretrained(save_dir)
# --- end config save block ---

model.to(CPU)
# print_module_tree(model)
# model.tie_weights()
# undo_offload_to_disk(model)
# print("dbug 1")
# print_module_tree(model)

# TODO FIX ME..remove this ugly patch and find core issue why output_embedding is not retied after offload/undo_offload
input_embed = model.get_input_embeddings()
output_embed = model.get_output_embeddings()
undo_offload_to_disk(output_embed, delete_offload_folders=True)
undo_offload_to_disk(input_embed, delete_offload_folders=True)

# print("dbug 2")
# print_module_tree(model)

# model.to(CPU)
state_dict = get_state_dict_for_save(model)

model_base_name = "model"
Expand Down
2 changes: 1 addition & 1 deletion gptqmodel/utils/offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def undo_offload_to_disk(
remove_hook_from_module(module, recurse=False) # ensure root is also clean

# 3) Tie embedding if module is model and enabled/tied
if hasattr(module, "config") and hasattr(module.config, "tie_word_embeddings") and module.config.tie_word_embeddings:
if hasattr(module, "config") and getattr(module.config, "tie_word_embeddings", False):
module.tie_weights() # makes lm_head.weight point to embed_tokens.weight again after undo_offload

# 4) Optionally delete offload folders.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_quant_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


class TestQuantBatch(ModelTest):
NATIVE_MODEL_ID = "/monster/data/model/TinyLlama-1.1B-Chat-v1.0"
NATIVE_MODEL_ID = "/monster/data/model/Llama-3.2-1B-Instruct"

def calculate_avg_ppl(self, model, tokenizer):
ppl = Perplexity(
Expand Down