diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py index 4f7723a02..3ef45433d 100644 --- a/gptqmodel/quantization/gptq.py +++ b/gptqmodel/quantization/gptq.py @@ -500,9 +500,19 @@ def _materialize_global_hessian(self, target_device: Optional[torch.device] = No for partial_device, partial in self._device_hessian_partials.items(): if partial.device != result_accum.device or partial.dtype != torch.float32: - tmp = partial.to(device=result_accum.device, dtype=torch.float32) - result_accum.add_(tmp) - del tmp + # TODO FIXME multi-3090 using P2P is revaling an issue where result_accum and/or partial is not ready for consolidation on the main thread + # when parials are calculated on the individual + try: + tmp = partial.to(device=result_accum.device, dtype=torch.float32) + result_accum.add_(tmp) + del tmp + except: + log.warn(f"Quantization: Module `{self.name}` -> Retry 1/2 partial.to in 0.5s") + time.sleep(0.25) + tmp = partial.to(device=result_accum.device, dtype=torch.float32) + result_accum.add_(tmp) + del tmp + else: result_accum.add_(partial)