From aa7f2522dc438d3b9099fa31bf733ea64c3d1e37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= <aedu.waelchli@gmail.com>
Date: Wed, 8 Mar 2023 22:36:00 +0100
Subject: [PATCH] Fix race condition in Fabric test (#17002)

---
 tests/tests_fabric/parity/test_parity_ddp.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/tests_fabric/parity/test_parity_ddp.py b/tests/tests_fabric/parity/test_parity_ddp.py
index 73933742ca069..10c7fc711c928 100644
--- a/tests/tests_fabric/parity/test_parity_ddp.py
+++ b/tests/tests_fabric/parity/test_parity_ddp.py
@@ -125,7 +125,6 @@ def train_fabric_ddp(fabric):
     return model.state_dict(), torch.tensor(iteration_timings), memory_stats
 
 
-@pytest.mark.flaky(reruns=3)
 @RunIf(standalone=True)
 @pytest.mark.usefixtures("reset_deterministic_algorithm", "reset_cudnn_benchmark")
 @pytest.mark.parametrize(
@@ -148,6 +147,9 @@ def test_parity_ddp(accelerator, devices, tolerance):
     fabric.barrier()
     cuda_reset()
     torch.distributed.destroy_process_group()
+    # sleep for a bit to avoid race conditions, since the very first call in `train_torch_ddp`
+    # is initializing a new process group
+    time.sleep(3)
 
     # Train with raw PyTorch
     state_dict_torch, timings_torch, memory_torch = train_torch_ddp(