From acea51aab3ae9c443b19a072ff7fa8791afe58a6 Mon Sep 17 00:00:00 2001 From: rraminen Date: Thu, 26 Jun 2025 07:14:16 +0000 Subject: [PATCH 1/3] Clean up CUDA state between tests --- test/test_cuda.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/test_cuda.py b/test/test_cuda.py index 9dd18eb12cfbb..999053250f8bb 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -442,6 +442,10 @@ def test_out_of_memory_retry(self): IS_JETSON, "oom reporting has issues on jetson igx due to partial nvml support" ) def test_set_per_process_memory_fraction(self): + if torch.version.hip: + torch.cuda.empty_cache() + torch.cuda.reset_peak_memory_stats() + orig = torch.cuda.get_per_process_memory_fraction(0) try: # test invalid fraction value. From b00cb058f98e7a9c06f7c3f1a556e2ff2ff2cc7f Mon Sep 17 00:00:00 2001 From: rraminen Date: Tue, 8 Jul 2025 21:26:50 +0000 Subject: [PATCH 2/3] Specific to gfx1101 --- test/test_cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_cuda.py b/test/test_cuda.py index 999053250f8bb..d7d171eca31b6 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -442,7 +442,7 @@ def test_out_of_memory_retry(self): IS_JETSON, "oom reporting has issues on jetson igx due to partial nvml support" ) def test_set_per_process_memory_fraction(self): - if torch.version.hip: + if torch.version.hip and ('gfx1101' in torch.cuda.get_device_properties(0).gcnArchName): torch.cuda.empty_cache() torch.cuda.reset_peak_memory_stats() From 7bff9c3008b162b45b6a53b96fdd3d3d1c2405cf Mon Sep 17 00:00:00 2001 From: rraminen Date: Wed, 9 Jul 2025 18:27:13 +0000 Subject: [PATCH 3/3] Remove extra line --- test/test_cuda.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_cuda.py b/test/test_cuda.py index d7d171eca31b6..1f6decf765ea6 100644 --- a/test/test_cuda.py +++ b/test/test_cuda.py @@ -445,7 +445,6 @@ def test_set_per_process_memory_fraction(self): if torch.version.hip and ('gfx1101' in torch.cuda.get_device_properties(0).gcnArchName): torch.cuda.empty_cache() torch.cuda.reset_peak_memory_stats() - orig = torch.cuda.get_per_process_memory_fraction(0) try: # test invalid fraction value.