From 5dd58a9194c53bf74c9c19fcaa57f2fc5c4cdee1 Mon Sep 17 00:00:00 2001 From: "Nichols A. Romero" Date: Tue, 22 Jul 2025 19:45:35 +0000 Subject: [PATCH] [ROCm][tunableop] UT tolerance increase for matmul_small_brute_force_tunableop at FP16 (#158788) TunableOp will sometimes find a less precise solution due to the small input vectors used in this UT. Bumping op tolerance to eliminate flakiness. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158788 Approved by: https://github.com/jeffdaily (cherry picked from commit c917c63282c467ef942c99da3ce4fa57bceba603) --- test/test_linalg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_linalg.py b/test/test_linalg.py index fe000c4ae9efc..6c4e6ccf04cd8 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -4704,6 +4704,7 @@ def test_matmul_small_brute_force_3d_Nd(self, device, dtype): @onlyCUDA @skipCUDAIfNotRocm # Skipping due to SM89 OOM in CI, UT doesn't do much on NV anyways @dtypes(*floating_types_and(torch.half)) + @precisionOverride({torch.float16: 1e-1}) # TunableOp may occasionally find less precise solution def test_matmul_small_brute_force_tunableop(self, device, dtype): # disable tunableop buffer rotation for all tests everywhere, it can be slow # We set the TunableOp numerical check environment variable here because it is