From 80814221230efb6e43667ccb43fa8a01dcab178d Mon Sep 17 00:00:00 2001 From: avtc Date: Tue, 4 Nov 2025 18:26:19 +0200 Subject: [PATCH 1/2] adjust retry partial.to --- gptqmodel/quantization/gptq.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index fe4a00301..6ec26980e 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -575,9 +575,14 @@ def materialize_global_hessian(self, target_device: Optional[torch.device] = Non try: result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) except: - log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to in 0.25s") - time.sleep(0.25) - result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) + log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to 1/2 in 0.5s") + time.sleep(0.5) + try: + result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) + except: + log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to 2/2 in 0.5s") + time.sleep(0.5) + result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) else: result_accum.add_(partial) From 6528b284534f2f338e762041758236878469513e Mon Sep 17 00:00:00 2001 From: avtc Date: Wed, 5 Nov 2025 00:35:19 +0200 Subject: [PATCH 2/2] adjust delay --- gptqmodel/quantization/gptq.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index 6ec26980e..bdf7a7656 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -575,13 +575,13 @@ def materialize_global_hessian(self, target_device: Optional[torch.device] = Non try: result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) except: - log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to 1/2 in 0.5s") - time.sleep(0.5) + log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to 1/2 in 0.25s") + time.sleep(0.25) try: result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) except: - log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to 2/2 in 0.5s") - time.sleep(0.5) + log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to 2/2 in 0.75s") + time.sleep(0.75) result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) else: result_accum.add_(partial)