From 3e07b25436445cbf4a1d5a39ea36acfa69ee646a Mon Sep 17 00:00:00 2001
From: akashveramd <Akash.Verma3@amd.com>
Date: Mon, 7 Jul 2025 14:54:34 -0700
Subject: [PATCH] Skipped *_stress_cuda UTs in test_c10d_gloo in release/2.5
 branch. (#2317)

In this PR, I have skipped *_stress_cuda UTs in test_c10d_gloo in
release/2.5 branch. The tests are also skipped upstream.
The test was failing for Jira ticket-
https://ontrack-internal.amd.com/browse/SWDEV-503871

Tested using docker image-
compute-artifactory.amd.com:5000/rocm-plus-docker/framework/compute-rocm-rel-6.4:114_ubuntu22.04_py3.10_pytorch_release-2.5_a1ad153
---
 test/distributed/test_c10d_gloo.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py
index ee1e04a2be792..c8ded6f3f9922 100644
--- a/test/distributed/test_c10d_gloo.py
+++ b/test/distributed/test_c10d_gloo.py
@@ -52,6 +52,7 @@
     retry_on_connect_failures,
     run_tests,
     skip_but_pass_in_sandcastle,
+    skipIfRocm,
     TestCase,
 )
 
@@ -385,6 +386,7 @@ def test_broadcast_stress(self):
         inputs = [torch.tensor([i * self.world_size + self.rank]) for i in range(1000)]
         self._test_broadcast_stress(inputs)
 
+    @skipIfRocm
     @skip_if_lt_x_gpu(2)
     @requires_gloo()
     def test_broadcast_stress_cuda(self):
@@ -490,6 +492,7 @@ def test_allreduce_stress(self):
         inputs = [torch.tensor([i + self.rank]) for i in range(1000)]
         self._test_allreduce_stress(inputs)
 
+    @skipIfRocm
     @skip_if_lt_x_gpu(2)
     @requires_gloo()
     def test_allreduce_stress_cuda(self):
@@ -922,6 +925,8 @@ def test_scatter_stress(self):
     @skip_but_pass_in_sandcastle(
         "Test is flaky, see https://github.com/pytorch/pytorch/issues/15963"
     )
+
+    @skipIfRocm
     @skip_if_lt_x_gpu(2)
     @requires_gloo()
     def test_scatter_stress_cuda(self):
@@ -1096,6 +1101,7 @@ def test_gather_stress(self):
         inputs = [torch.tensor([i + self.rank]) for i in range(1000)]
         self._test_gather_stress(inputs, lambda t: t.clone())
 
+    @skipIfRocm
     @skip_if_lt_x_gpu(2)
     @requires_gloo()
     def test_gather_stress_cuda(self):
@@ -1231,6 +1237,7 @@ def test_allgather_stress(self):
         inputs = [torch.tensor([i + self.rank]) for i in range(1000)]
         self._test_allgather_stress(inputs, lambda t: t.clone())
 
+    @skipIfRocm
     @skip_if_lt_x_gpu(2)
     @requires_gloo()
     def test_allgather_stress_cuda(self):
@@ -1417,6 +1424,7 @@ def test_reduce_stress(self):
         inputs = [torch.tensor([i + self.rank]) for i in range(1000)]
         self._test_reduce_stress(inputs)
 
+    @skipIfRocm
     @skip_if_lt_x_gpu(2)
     @requires_gloo()
     def test_reduce_stress_cuda(self):