diff --git a/gptqmodel/models/writer.py b/gptqmodel/models/writer.py index 6f33b2d4a..dc796d4e2 100644 --- a/gptqmodel/models/writer.py +++ b/gptqmodel/models/writer.py @@ -225,20 +225,8 @@ def save_quantized( f"Using 'format = {FORMAT.GPTQ_V2}': the serialized model is only supported by GPTQModel version >= {MIN_VERSION_WITH_V2}." ) - if not self.load_quantized_model: - model = self.model - # internal is always gptq v2 but allow users to pass gptq (v1) via config - if ( - quantize_config.format == FORMAT.GPTQ - and quantize_config.quant_method == METHOD.GPTQ - and self.qlinear_kernel.REQUIRES_FORMAT_V2 - ): - # Model qzeros may be edited in place for export compatibility. - model = convert_gptq_v2_to_v1_format( - model, quantize_config=quantize_config, qlinear_kernel=self.qlinear_kernel - ) - else: - model = self.get_model_with_quantize( + if self.load_quantized_model: + self.model = self.get_model_with_quantize( qcfg=quantize_config, model_id_or_path=self.model_local_path, ) @@ -286,10 +274,10 @@ def debug_saved_config(path): # Due to shell/turtle state, we need to sync the modules from turtle to shell if not self.load_quantized_model: - alias_all_from_turtle_if_meta(shell_model=model, turtle_model=self.turtle_model) + alias_all_from_turtle_if_meta(shell_model=self.model, turtle_model=self.turtle_model) offload_root = self.quantize_config.offload_to_disk_path if getattr(self.quantize_config, "offload_to_disk", False) else None - state_dict = get_state_dict_for_save(model, offload_root=offload_root) + state_dict = get_state_dict_for_save(self.model, offload_root=offload_root) model_base_name = "model" model_save_name = model_base_name + ".safetensors"