diff --git a/gptqmodel/nn_modules/qlinear/__init__.py b/gptqmodel/nn_modules/qlinear/__init__.py index 89b0625bc..cf31d0392 100644 --- a/gptqmodel/nn_modules/qlinear/__init__.py +++ b/gptqmodel/nn_modules/qlinear/__init__.py @@ -485,6 +485,18 @@ def dequantize_weight(self, num_itr: int = 1): return weights + # FIXME, optimum needs call pack(), we need to remove it + def pack( + self, + linear: nn.Module, + scales: t.Tensor, + zeros: t.Tensor, + g_idx: t.Tensor, + block_in: int = 8192, + workers: int = 8, + ): + self.pack_block(linear, scales, zeros, g_idx, block_in, workers) + @t.inference_mode() def pack_block( self, diff --git a/tests/test_olora_finetuning_xpu.py b/tests/test_olora_finetuning_xpu.py index 755fdebf5..a7c414272 100644 --- a/tests/test_olora_finetuning_xpu.py +++ b/tests/test_olora_finetuning_xpu.py @@ -140,7 +140,7 @@ def generate_and_tokenize_prompt(example): learning_rate=learning_rate, logging_steps=1, optim="adamw_torch", - evaluation_strategy="steps", + eval_strategy="steps", save_strategy="steps", eval_steps=eval_step, save_steps=save_step,