Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gptqmodel/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def __init__(
self.model = self.after_model_load(model, load_quantized_model=load_quantized_model)
self.turtle_model = turtle_model

self.compiled = False # set to True while compile() is triggered successfully
self.compiled = False # set to True while compile() is triggered successfully
self.quantized = quantized
self.load_quantized_model = load_quantized_model
if tokenizer is not None:
Expand Down
3 changes: 2 additions & 1 deletion gptqmodel/models/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,8 @@ def debug_saved_config(path):
# --- end config save block ---

# Due to shell/turtle state, we need to sync the modules from turtle to shell
alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model)
if not self.load_quantized_model:
alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model)

state_dict = get_state_dict_for_save(model)

Expand Down
5 changes: 4 additions & 1 deletion gptqmodel/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,12 @@ def __post_init__(self):
if self.damp_percent is None:
if self.quant_method == METHOD.QQQ:
self.damp_percent = 0.005
self.damp_auto_increment = 0.001
else:
self.damp_percent = 0.05
if self.damp_auto_increment is None:
if self.quant_method == METHOD.QQQ:
self.damp_auto_increment = 0.001
else:
self.damp_auto_increment = 0.01

# TODO FIXME awq compat which didn't have checkpoint_format before merging to gptqmodel
Expand Down
2 changes: 1 addition & 1 deletion tests/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_marlin_local_serialization(self):
model = GPTQModel.load(tmpdir, device="cuda:0", backend=BACKEND.MARLIN)

def test_gptq_v1_to_v2_runtime_convert(self):
model = GPTQModel.load(self.MODEL_ID, device="cuda:0")
model = GPTQModel.load(self.MODEL_ID, device="cuda:0", backend=BACKEND.EXLLAMA_V2)
self.assertEqual(model.quantize_config.runtime_format, FORMAT.GPTQ_V2)

def test_gptq_v1_serialization(self):
Expand Down