diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index fe4a00301..bdf7a7656 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -575,9 +575,14 @@ def materialize_global_hessian(self, target_device: Optional[torch.device] = Non try: result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) except: - log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to in 0.25s") + log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to 1/2 in 0.25s") time.sleep(0.25) - result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) + try: + result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) + except: + log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to 2/2 in 0.75s") + time.sleep(0.75) + result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) else: result_accum.add_(partial)