From e421cab1e63d42cd565a71c3b8e94716667827ce Mon Sep 17 00:00:00 2001 From: akashveramd Date: Mon, 7 Jul 2025 14:54:34 -0700 Subject: [PATCH 1/2] Cherry-picked commit with merge conflict --- test/distributed/test_c10d_gloo.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py index 57ad689179da9..c60b2d8f5f722 100644 --- a/test/distributed/test_c10d_gloo.py +++ b/test/distributed/test_c10d_gloo.py @@ -54,7 +54,11 @@ retry_on_connect_failures, run_tests, skip_but_pass_in_sandcastle, +<<<<<<< HEAD skipIfRocmArch, +======= + skipIfRocm, +>>>>>>> 2269e37502 (Skipped *_stress_cuda UTs in test_c10d_gloo in release/2.5 branch. (#2317)) TestCase, ) @@ -395,6 +399,7 @@ def test_broadcast_stress(self): inputs = [torch.tensor([i * self.world_size + self.rank]) for i in range(1000)] self._test_broadcast_stress(inputs) + @skipIfRocm @skip_if_lt_x_gpu(2) @requires_gloo() def test_broadcast_stress_cuda(self): @@ -500,6 +505,7 @@ def test_allreduce_stress(self): inputs = [torch.tensor([i + self.rank]) for i in range(1000)] self._test_allreduce_stress(inputs) + @skipIfRocm @skip_if_lt_x_gpu(2) @requires_gloo() def test_allreduce_stress_cuda(self): @@ -953,6 +959,8 @@ def test_scatter_stress(self): @skip_but_pass_in_sandcastle( "Test is flaky, see https://github.com/pytorch/pytorch/issues/15963" ) + + @skipIfRocm @skip_if_lt_x_gpu(2) @requires_gloo() def test_scatter_stress_cuda(self): @@ -1127,6 +1135,7 @@ def test_gather_stress(self): inputs = [torch.tensor([i + self.rank]) for i in range(1000)] self._test_gather_stress(inputs, lambda t: t.clone()) + @skipIfRocm @skip_if_lt_x_gpu(2) @skipIfRocmArch(MI300_ARCH) @requires_gloo() @@ -1263,6 +1272,7 @@ def test_allgather_stress(self): inputs = [torch.tensor([i + self.rank]) for i in range(1000)] self._test_allgather_stress(inputs, lambda t: t.clone()) + @skipIfRocm @skip_if_lt_x_gpu(2) @requires_gloo() def test_allgather_stress_cuda(self): @@ -1449,6 +1459,7 @@ def test_reduce_stress(self): inputs = [torch.tensor([i + self.rank]) for i in range(1000)] self._test_reduce_stress(inputs) + @skipIfRocm @skip_if_lt_x_gpu(2) @requires_gloo() def test_reduce_stress_cuda(self): From 8e5212af30f8cf79b3b6b464ce8a9f33ffdeed93 Mon Sep 17 00:00:00 2001 From: Prachi Gupta Date: Mon, 28 Jul 2025 17:32:34 +0000 Subject: [PATCH 2/2] [rocm7.0_internal_testing] Skipped *_stress_cuda UTs in test_c10d_gloo --- test/distributed/test_c10d_gloo.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py index c60b2d8f5f722..a565e8932fca4 100644 --- a/test/distributed/test_c10d_gloo.py +++ b/test/distributed/test_c10d_gloo.py @@ -54,11 +54,8 @@ retry_on_connect_failures, run_tests, skip_but_pass_in_sandcastle, -<<<<<<< HEAD skipIfRocmArch, -======= skipIfRocm, ->>>>>>> 2269e37502 (Skipped *_stress_cuda UTs in test_c10d_gloo in release/2.5 branch. (#2317)) TestCase, )