diff --git a/monai/__init__.py b/monai/__init__.py index 7ab30bcae7..5043208b9c 100644 --- a/monai/__init__.py +++ b/monai/__init__.py @@ -26,7 +26,7 @@ __basedir__ = os.path.dirname(__file__) -if not (sys.version_info.major == PY_REQUIRED_MAJOR and sys.version_info.minor >= PY_REQUIRED_MINOR): +if sys.version_info.major != PY_REQUIRED_MAJOR or sys.version_info.minor < PY_REQUIRED_MINOR: raise RuntimeError( "MONAI requires Python {}.{} or higher. But the current Python is: {}".format( PY_REQUIRED_MAJOR, PY_REQUIRED_MINOR, sys.version diff --git a/tests/test_affine_grid.py b/tests/test_affine_grid.py index 972cf20a1f..9bf2bcf90e 100644 --- a/tests/test_affine_grid.py +++ b/tests/test_affine_grid.py @@ -16,7 +16,7 @@ from parameterized import parameterized from monai.transforms import AffineGrid -from tests.utils import TEST_NDARRAYS, assert_allclose +from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env TESTS = [] for p in TEST_NDARRAYS: @@ -107,6 +107,8 @@ ] ) +_rtol = 5e-2 if is_tf32_env() else 1e-4 + class TestAffineGrid(unittest.TestCase): @parameterized.expand(TESTS) @@ -115,7 +117,7 @@ def test_affine_grid(self, input_param, input_data, expected_val): result, _ = g(**input_data) if "device" in input_data: self.assertEqual(result.device, input_data[device]) - assert_allclose(result, expected_val, type_test=False, rtol=1e-4, atol=1e-4) + assert_allclose(result, expected_val, type_test=False, rtol=_rtol) if __name__ == "__main__": diff --git a/tests/test_affine_transform.py b/tests/test_affine_transform.py index 42af58be73..ef39c297ce 100644 --- a/tests/test_affine_transform.py +++ b/tests/test_affine_transform.py @@ -17,6 +17,9 @@ from monai.networks import normalize_transform, to_norm_affine from monai.networks.layers import AffineTransform +from tests.utils import is_tf32_env + +_rtol = 1e-4 if not is_tf32_env() else 5e-3 TEST_NORM_CASES = [ [(4, 5), True, [[[0.666667, 0, -1], [0, 0.5, -1], [0, 0, 1]]]], @@ -95,7 +98,7 @@ def test_to_norm_affine(self, affine, src_size, dst_size, align_corners, expecte affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32) new_affine = to_norm_affine(affine, src_size, dst_size, align_corners) new_affine = new_affine.detach().cpu().numpy() - np.testing.assert_allclose(new_affine, expected, atol=1e-4) + np.testing.assert_allclose(new_affine, expected, atol=1e-5, rtol=_rtol) @parameterized.expand(TEST_ILL_TO_NORM_AFFINE_CASES) def test_to_norm_affine_ill(self, affine, src_size, dst_size, align_corners): @@ -113,7 +116,7 @@ def test_affine_shift(self): out = AffineTransform()(image, affine) out = out.detach().cpu().numpy() expected = [[[[0, 4, 1, 3], [0, 7, 6, 8], [0, 3, 5, 3]]]] - np.testing.assert_allclose(out, expected, atol=1e-5) + np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol) def test_affine_shift_1(self): affine = torch.as_tensor([[1.0, 0.0, -1.0], [0.0, 1.0, -1.0]]) @@ -121,7 +124,7 @@ def test_affine_shift_1(self): out = AffineTransform()(image, affine) out = out.detach().cpu().numpy() expected = [[[[0, 0, 0, 0], [0, 4, 1, 3], [0, 7, 6, 8]]]] - np.testing.assert_allclose(out, expected, atol=1e-5) + np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol) def test_affine_shift_2(self): affine = torch.as_tensor([[1.0, 0.0, -1.0], [0.0, 1.0, 0.0]]) @@ -129,28 +132,28 @@ def test_affine_shift_2(self): out = AffineTransform()(image, affine) out = out.detach().cpu().numpy() expected = [[[[0, 0, 0, 0], [4, 1, 3, 2], [7, 6, 8, 5]]]] - np.testing.assert_allclose(out, expected, atol=1e-5) + np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol) def test_zoom(self): affine = torch.as_tensor([[1.0, 0.0, 0.0], [0.0, 2.0, 0.0]]) image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0")) out = AffineTransform((3, 2))(image, affine) expected = [[[[1, 3], [5, 7], [9, 11]]]] - np.testing.assert_allclose(out, expected, atol=1e-5) + np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol) def test_zoom_1(self): affine = torch.as_tensor([[2.0, 0.0, 0.0], [0.0, 1.0, 0.0]]) image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0")) out = AffineTransform()(image, affine, (1, 4)) expected = [[[[1, 2, 3, 4]]]] - np.testing.assert_allclose(out, expected, atol=1e-5) + np.testing.assert_allclose(out, expected, atol=_rtol) def test_zoom_2(self): affine = torch.as_tensor([[2.0, 0.0, 0.0], [0.0, 2.0, 0.0]], dtype=torch.float32) image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0")) out = AffineTransform((1, 2))(image, affine) expected = [[[[1, 3]]]] - np.testing.assert_allclose(out, expected, atol=1e-5) + np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol) def test_affine_transform_minimum(self): t = np.pi / 3 @@ -169,7 +172,7 @@ def test_affine_transform_minimum(self): ] ] ] - np.testing.assert_allclose(out, expected, atol=1e-5) + np.testing.assert_allclose(out, expected, atol=1e-3, rtol=_rtol) def test_affine_transform_2d(self): t = np.pi / 3 @@ -188,7 +191,7 @@ def test_affine_transform_2d(self): ] ] ] - np.testing.assert_allclose(out, expected, atol=1e-5) + np.testing.assert_allclose(out, expected, atol=1e-3, rtol=_rtol) if torch.cuda.is_available(): affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32) @@ -205,7 +208,7 @@ def test_affine_transform_2d(self): ] ] ] - np.testing.assert_allclose(out, expected, atol=1e-4) + np.testing.assert_allclose(out, expected, atol=5e-3) def test_affine_transform_3d(self): t = np.pi / 3 @@ -231,7 +234,7 @@ def test_affine_transform_3d(self): ] ], ] - np.testing.assert_allclose(out, expected, atol=1e-4) + np.testing.assert_allclose(out, expected, atol=1e-4, rtol=_rtol) if torch.cuda.is_available(): affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32) @@ -255,7 +258,7 @@ def test_affine_transform_3d(self): ] ], ] - np.testing.assert_allclose(out, expected, atol=1e-4) + np.testing.assert_allclose(out, expected, atol=5e-3) def test_ill_affine_transform(self): with self.assertRaises(ValueError): # image too small diff --git a/tests/test_create_grid_and_affine.py b/tests/test_create_grid_and_affine.py index b53eaa5b9d..cd8d75f63e 100644 --- a/tests/test_create_grid_and_affine.py +++ b/tests/test_create_grid_and_affine.py @@ -22,7 +22,7 @@ create_shear, create_translate, ) -from tests.utils import assert_allclose +from tests.utils import assert_allclose, is_tf32_env class TestCreateGrid(unittest.TestCase): @@ -162,7 +162,7 @@ def test_assert(func, params, expected): m = func(*params, device="cuda:0", backend="torch") else: m = func(*params, backend=b) - assert_allclose(m, expected, type_test=False, atol=1e-7) + assert_allclose(m, expected, type_test=False, rtol=1e-2 if is_tf32_env() else 1e-5, atol=1e-5) class TestCreateAffine(unittest.TestCase): diff --git a/tests/test_global_mutual_information_loss.py b/tests/test_global_mutual_information_loss.py index a688ea8394..6a658563bc 100644 --- a/tests/test_global_mutual_information_loss.py +++ b/tests/test_global_mutual_information_loss.py @@ -114,7 +114,7 @@ class TestGlobalMutualInformationLoss(unittest.TestCase): @SkipIfBeforePyTorchVersion((1, 9)) def test_shape(self, input_param, input_data, expected_val): result = GlobalMutualInformationLoss(**input_param).forward(**input_data) - np.testing.assert_allclose(result.detach().cpu().numpy(), expected_val, rtol=1e-4) + np.testing.assert_allclose(result.detach().cpu().numpy(), expected_val, rtol=1e-3, atol=1e-3) def test_ill_shape(self): loss = GlobalMutualInformationLoss() diff --git a/tests/test_lltm.py b/tests/test_lltm.py index f1311379bc..4186c91246 100644 --- a/tests/test_lltm.py +++ b/tests/test_lltm.py @@ -15,7 +15,9 @@ from parameterized import parameterized from monai.networks.layers import LLTM -from tests.utils import SkipIfNoModule +from tests.utils import SkipIfNoModule, is_tf32_env + +_rtol = 0.001 if is_tf32_env() else 0.0001 TEST_CASE_1 = [ {"input_features": 32, "state_size": 2}, @@ -50,8 +52,8 @@ def test_value_cuda(self, input_param, expected_h, expected_c): new_h, new_c = lltm(x, (h, c)) (new_h.sum() + new_c.sum()).backward() - torch.testing.assert_allclose(new_h, expected_h.to(device), rtol=0.0001, atol=1e-04) - torch.testing.assert_allclose(new_c, expected_c.to(device), rtol=0.0001, atol=1e-04) + torch.testing.assert_allclose(new_h, expected_h.to(device), rtol=_rtol, atol=0.001) + torch.testing.assert_allclose(new_c, expected_c.to(device), rtol=_rtol, atol=0.001) if __name__ == "__main__": diff --git a/tests/test_rand_affine.py b/tests/test_rand_affine.py index c88aa538ed..96322813c9 100644 --- a/tests/test_rand_affine.py +++ b/tests/test_rand_affine.py @@ -17,7 +17,9 @@ from monai.transforms import RandAffine from monai.utils.type_conversion import convert_data_type -from tests.utils import TEST_NDARRAYS, assert_allclose +from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env + +_rtol = 1e-3 if is_tf32_env() else 1e-4 TESTS = [] for p in TEST_NDARRAYS: @@ -141,7 +143,7 @@ def test_rand_affine(self, input_param, input_data, expected_val): result = g(**input_data) if input_param.get("cache_grid", False): self.assertTrue(g._cached_grid is not None) - assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4) + assert_allclose(result, expected_val, rtol=_rtol, atol=1e-4) def test_ill_cache(self): with self.assertWarns(UserWarning): diff --git a/tests/test_rand_affine_grid.py b/tests/test_rand_affine_grid.py index 4fb534aba1..ade615cd65 100644 --- a/tests/test_rand_affine_grid.py +++ b/tests/test_rand_affine_grid.py @@ -16,7 +16,9 @@ from parameterized import parameterized from monai.transforms import RandAffineGrid -from tests.utils import TEST_NDARRAYS, assert_allclose +from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env + +_rtol = 1e-1 if is_tf32_env else 1e-4 TESTS = [] for p in TEST_NDARRAYS: @@ -201,7 +203,7 @@ def test_rand_affine_grid(self, input_param, input_data, expected_val): result = g(**input_data) if "device" in input_data: self.assertEqual(result.device, input_data[device]) - assert_allclose(result, expected_val, type_test=False, rtol=1e-4, atol=1e-4) + assert_allclose(result, expected_val, type_test=False, rtol=_rtol, atol=1e-4) if __name__ == "__main__": diff --git a/tests/test_rand_affined.py b/tests/test_rand_affined.py index 0109175b16..651452ab07 100644 --- a/tests/test_rand_affined.py +++ b/tests/test_rand_affined.py @@ -17,7 +17,9 @@ from monai.transforms import RandAffined from monai.utils import GridSampleMode -from tests.utils import TEST_NDARRAYS, assert_allclose +from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env + +_rtol = 1e-3 if is_tf32_env() else 1e-4 TESTS = [] for p in TEST_NDARRAYS: @@ -209,7 +211,7 @@ def test_rand_affined(self, input_param, input_data, expected_val): if "_transforms" in key: continue expected = expected_val[key] if isinstance(expected_val, dict) else expected_val - assert_allclose(result, expected, rtol=1e-4, atol=1e-4) + assert_allclose(result, expected, rtol=_rtol, atol=1e-3) g.set_random_state(4) res = g(input_data) diff --git a/tests/test_rand_elastic_2d.py b/tests/test_rand_elastic_2d.py index c414eb1ffd..22920d0f35 100644 --- a/tests/test_rand_elastic_2d.py +++ b/tests/test_rand_elastic_2d.py @@ -16,7 +16,9 @@ from parameterized import parameterized from monai.transforms import Rand2DElastic -from tests.utils import TEST_NDARRAYS, assert_allclose +from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env + +_rtol = 5e-3 if is_tf32_env() else 1e-4 TESTS = [] for p in TEST_NDARRAYS: @@ -110,7 +112,7 @@ def test_rand_2d_elastic(self, input_param, input_data, expected_val): g = Rand2DElastic(**input_param) g.set_random_state(123) result = g(**input_data) - assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4) + assert_allclose(result, expected_val, rtol=_rtol, atol=1e-4) if __name__ == "__main__": diff --git a/tests/test_rand_elasticd_2d.py b/tests/test_rand_elasticd_2d.py index 84f18120e1..77e6489d50 100644 --- a/tests/test_rand_elasticd_2d.py +++ b/tests/test_rand_elasticd_2d.py @@ -16,7 +16,9 @@ from parameterized import parameterized from monai.transforms import Rand2DElasticd -from tests.utils import TEST_NDARRAYS, assert_allclose +from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env + +_rtol = 5e-3 if is_tf32_env() else 1e-4 TESTS = [] for p in TEST_NDARRAYS: @@ -164,7 +166,7 @@ def test_rand_2d_elasticd(self, input_param, input_data, expected_val): for key in res: result = res[key] expected = expected_val[key] if isinstance(expected_val, dict) else expected_val - assert_allclose(result, expected, rtol=1e-4, atol=1e-4) + assert_allclose(result, expected, rtol=_rtol, atol=5e-3) if __name__ == "__main__": diff --git a/tests/utils.py b/tests/utils.py index b7e32068c3..a3d52ae2cb 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -25,7 +25,7 @@ from io import BytesIO from subprocess import PIPE, Popen from typing import Callable, Optional, Tuple -from urllib.error import ContentTooShortError, HTTPError, URLError +from urllib.error import HTTPError, URLError import numpy as np import torch @@ -43,6 +43,7 @@ nib, _ = optional_import("nibabel") quick_test_var = "QUICKTEST" +_tf32_enabled = None def clone(data: NdarrayTensor) -> NdarrayTensor: @@ -94,16 +95,43 @@ def assert_allclose( def test_pretrained_networks(network, input_param, device): try: - net = network(**input_param).to(device) - except (URLError, HTTPError, ContentTooShortError) as e: + return network(**input_param).to(device) + except (URLError, HTTPError) as e: raise unittest.SkipTest(e) from e - return net def test_is_quick(): return os.environ.get(quick_test_var, "").lower() == "true" +def is_tf32_env(): + """ + The environment variable NVIDIA_TF32_OVERRIDE=0 will override any defaults + or programmatic configuration of NVIDIA libraries, and consequently, + cuBLAS will not accelerate FP32 computations with TF32 tensor cores. + """ + global _tf32_enabled + if _tf32_enabled is None: + _tf32_enabled = False + if ( + torch.cuda.is_available() + and not version_leq(f"{torch.version.cuda}", "10.100") # at least 11.0 + and os.environ.get("NVIDIA_TF32_OVERRIDE", "1") != "0" + and torch.cuda.device_count() > 0 + ): + try: + # with TF32 enabled, the speed is ~8x faster, but the precision has ~2 digits less in the result + g_gpu = torch.Generator(device="cuda") + g_gpu.manual_seed(2147483647) + a_full = torch.randn(1024, 1024, dtype=torch.double, device="cuda", generator=g_gpu) + b_full = torch.randn(1024, 1024, dtype=torch.double, device="cuda", generator=g_gpu) + _tf32_enabled = (a_full.float() @ b_full.float() - a_full @ b_full).abs().max().item() > 0.001 # 0.1713 + except BaseException: + pass + print(f"tf32 enabled: {_tf32_enabled}") + return _tf32_enabled + + def skip_if_quick(obj): """ Skip the unit tests if environment variable `quick_test_var=true`.