From 353f4ffb313e0f91191f65c8350bea11ced2705e Mon Sep 17 00:00:00 2001 From: Ethan Wee Date: Tue, 1 Apr 2025 18:31:21 +0000 Subject: [PATCH] [ROCm] update test buffer fudge factor for hipblaslt (#150348) The default workspace for hipblaslt is larger than for cublas/cublaslt which requires a slight increase to the buffer needed. Forward-fix for #150227 that broke ROCm distributed tests but wasn't part of initial CI signal. Pull Request resolved: https://github.com/pytorch/pytorch/pull/150348 Approved by: https://github.com/jeffdaily --- test/distributed/_composable/fsdp/test_fully_shard_memory.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_memory.py b/test/distributed/_composable/fsdp/test_fully_shard_memory.py index 340fe913c1eba..de6df77479c92 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_memory.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_memory.py @@ -117,6 +117,9 @@ def _test_fully_shard_training_memory( # number is kept much smaller than the actual memory usage, which is on # the order of 100-200+ MB) buffer_mb = 16 + # The default workspace for hipblaslt is larger than for cublas/cublaslt + # which requires a slight increase to this buffer value. + buffer_mb = 16 if torch.version.cuda else 18 if reshard_after_forward: # 3x max unsharded block parameters (current all-gather + copy-out # and next all-gather), non-block parameters, and other