From 353f4ffb313e0f91191f65c8350bea11ced2705e Mon Sep 17 00:00:00 2001
From: Ethan Wee <Ethan.Wee@amd.com>
Date: Tue, 1 Apr 2025 18:31:21 +0000
Subject: [PATCH] [ROCm] update test buffer fudge factor for hipblaslt
 (#150348)

The default workspace for hipblaslt is larger than for cublas/cublaslt which requires a slight increase to the buffer needed.

Forward-fix for #150227 that broke ROCm distributed tests but wasn't part of initial CI signal.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/150348
Approved by: https://github.com/jeffdaily
---
 test/distributed/_composable/fsdp/test_fully_shard_memory.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/distributed/_composable/fsdp/test_fully_shard_memory.py b/test/distributed/_composable/fsdp/test_fully_shard_memory.py
index 340fe913c1eba..de6df77479c92 100644
--- a/test/distributed/_composable/fsdp/test_fully_shard_memory.py
+++ b/test/distributed/_composable/fsdp/test_fully_shard_memory.py
@@ -117,6 +117,9 @@ def _test_fully_shard_training_memory(
         # number is kept much smaller than the actual memory usage, which is on
         # the order of 100-200+ MB)
         buffer_mb = 16
+        # The default workspace for hipblaslt is larger than for cublas/cublaslt
+        # which requires a slight increase to this buffer value.
+        buffer_mb = 16 if torch.version.cuda else 18
         if reshard_after_forward:
             # 3x max unsharded block parameters (current all-gather + copy-out
             # and next all-gather), non-block parameters, and other