From c9ba49d261fb2e23ba9a01e61770a55369c8d552 Mon Sep 17 00:00:00 2001 From: "Nichols A. Romero" <165712832+naromero77amd@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:32:26 -0500 Subject: [PATCH] =?UTF-8?q?[release/2.7][ROCm][tunableop]=20UT=20tolerance?= =?UTF-8?q?=20increase=20for=20matmul=5Fsmall=5Fbrute=5Fforce=5F=E2=80=A6?= =?UTF-8?q?=20(#2397)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TunableOp will sometimes find a less precise solution due to the small input vectors used in this UT. Bumping up tolerance to eliminate flakiness. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158788 Approved by: https://github.com/jeffdaily (cherry picked from commit c917c63282c467ef942c99da3ce4fa57bceba603) --- test/test_linalg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_linalg.py b/test/test_linalg.py index b5ed3af02729f..1f5d4009cebba 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -4759,6 +4759,7 @@ def test_matmul_small_brute_force_3d_Nd(self, device, dtype): @onlyCUDA @skipCUDAIfNotRocm # Skipping due to SM89 OOM in CI, UT doesn't do much on NV anyways @dtypes(*floating_types_and(torch.half)) + @precisionOverride({torch.float16: 1e-1}) # TunableOp may occasionally find less precise solution def test_matmul_small_brute_force_tunableop(self, device, dtype): # disable tunableop buffer rotation for all tests everywhere, it can be slow # We set the TunableOp numerical check environment variable here because it is