diff --git a/gptqmodel/models/loader.py b/gptqmodel/models/loader.py index 95d768cff..e562870b3 100644 --- a/gptqmodel/models/loader.py +++ b/gptqmodel/models/loader.py @@ -202,6 +202,10 @@ def skip(*args, **kwargs): cls.before_model_load(cls, load_quantized_model=False) from ..utils.hf import build_shell_model + # XIELUActivation will use some weights when activation init, so can't use init_empty_weights + if hasattr(config, "hidden_act") and config.hidden_act == "xielu": + quantize_config.offload_to_disk = False + if quantize_config.offload_to_disk: model = build_shell_model(cls.loader, config=config, **model_init_kwargs) model._model_init_kwargs = model_init_kwargs