From 03a3f9b2be28439477d0d9302a3fca5cca5cd767 Mon Sep 17 00:00:00 2001
From: BL <110066325+BLOrange-AMD@users.noreply.github.com>
Date: Tue, 2 Sep 2025 10:39:20 -0500
Subject: [PATCH 1/2] Adjusted accuracy tolerance for MI350 on
 test_transformers

---
 test/test_transformers.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/test_transformers.py b/test/test_transformers.py
index 798095e065785..1bd836a3eb15b 100644
--- a/test/test_transformers.py
+++ b/test/test_transformers.py
@@ -3194,6 +3194,8 @@ def _get_mem_eff_drop_mask(batch_size, n_heads, q_len, kv_len, p, seed, offset,
                 fudge_factors['grad_query'] = 650.0
             if dtype == torch.float32:
                 fudge_factors['grad_key'] = 90.0
+                if "gfx95" in torch.cuda.get_device_properties(0).gcnArchName:
+                    fudge_factors['grad_value'] = 15.0
 
         check_out_and_grad(
             (out_ref, out_lp_ref, out),
@@ -3315,6 +3317,8 @@ def _get_mem_eff_drop_mask(batch_size, n_heads, q_len, kv_len, p, seed, offset,
                 fudge_factors['grad_query'] = 650.0
             if dtype == torch.float32:
                 fudge_factors['grad_key'] = 90.0
+                if "gfx95" in torch.cuda.get_device_properties(0).gcnArchName:
+                    fudge_factors['grad_value'] = 15.0
 
         check_out_and_grad(
             (out_ref, out_lp_ref, out),

From 9d5618fa7f142f69c753507c4081453bd951a3f1 Mon Sep 17 00:00:00 2001
From: Jeff Daily <jeff.daily@amd.com>
Date: Thu, 19 Jun 2025 15:02:40 +0000
Subject: [PATCH 2/2] [ROCm][CI] fix mi300 test failure after 6.4.1 update
 (#156368)

Fixes failures such as https://github.com/pytorch/pytorch/actions/runs/15739699156/job/44365395854: `test/test_linalg.py::TestLinalgCUDA::test_broadcast_batched_matmul_cuda`

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156368
Approved by: https://github.com/jeffdaily

Co-authored-by: Jeff Daily <jeff.daily@amd.com>
---
 test/test_linalg.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/test_linalg.py b/test/test_linalg.py
index 6fc46663a67e7..4208ada17d877 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -9003,7 +9003,8 @@ def dims_full_for_fn():
             r1 = fntorch(t0_full, t1, t2)
             self.assertEqual(r0, r1)
 
-    @tf32_on_and_off(0.001)
+    # ROCm 6.4 passes with tf32=on, but 6.4.1 needed tolerance reduced slightly
+    @tf32_on_and_off(0.002 if torch.version.hip else 0.001)
     @bf32_on_and_off(0.001)
     def test_broadcast_batched_matmul(self, device):
         n_dim = random.randint(1, 8)