diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index e787d8e29..fe4a00301 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -570,7 +570,14 @@ def materialize_global_hessian(self, target_device: Optional[torch.device] = Non for partial_device, partial in self._device_hessian_partials.items(): if partial.device != result_accum.device or partial.dtype != torch.float32: - result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) + # TODO FIXME multi-3090 using P2P is revaling an issue where result_accum and/or partial is not ready for consolidation on the main thread + # when parials are calculated on the individual + try: + result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) + except: + log.warn(f"Quantization: Module `{self.name}` -> Retry partial.to in 0.25s") + time.sleep(0.25) + result_accum.add_(partial.to(device=result_accum.device, dtype=torch.float32)) else: result_accum.add_(partial)