ModelCloud · Qubitium · Nov 2, 2025 · Nov 1, 2025 · Nov 2, 2025
diff --git a/gptqmodel/quantization/gptq.py b/gptqmodel/quantization/gptq.py
@@ -500,9 +500,19 @@ def _materialize_global_hessian(self, target_device: Optional[torch.device] = No
 
             for partial_device, partial in self._device_hessian_partials.items():
                 if partial.device != result_accum.device or partial.dtype != torch.float32:
-                    tmp = partial.to(device=result_accum.device, dtype=torch.float32)
-                    result_accum.add_(tmp)
-                    del tmp
+                    # TODO FIXME multi-3090 using P2P is revaling an issue where result_accum and/or partial is not ready for consolidation on the main thread
+                    # when parials are calculated on the individual 
+                    try:
+                        tmp = partial.to(device=result_accum.device, dtype=torch.float32)
+                        result_accum.add_(tmp)
+                        del tmp
+                    except:
+                        log.warn(f"Quantization: Module `{self.name}` -> Retry 1/2 partial.to in 0.5s")
+                        time.sleep(0.25)
+                        tmp = partial.to(device=result_accum.device, dtype=torch.float32)
+                        result_accum.add_(tmp)
+                        del tmp
+
                 else:
                     result_accum.add_(partial)