diff --git a/.jenkins/pytorch/build.sh b/.jenkins/pytorch/build.sh index cc8d0991f7218..e280db56c4225 100755 --- a/.jenkins/pytorch/build.sh +++ b/.jenkins/pytorch/build.sh @@ -43,6 +43,11 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then # This environment variable enabled HCC Optimizations that speed up the linking stage. # https://github.com/RadeonOpenCompute/hcc#hcc-with-thinlto-linking export KMTHINLTO=1 + + # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime + sudo apt-get install libc++1 + sudo apt-get install libc++abi1 + python tools/amd_build/build_pytorch_amd.py USE_ROCM=1 python setup.py install --user exit 0 diff --git a/.jenkins/pytorch/disabled-configs.txt b/.jenkins/pytorch/disabled-configs.txt index 7173b5fbb1063..cdd51d3fb54a5 100644 --- a/.jenkins/pytorch/disabled-configs.txt +++ b/.jenkins/pytorch/disabled-configs.txt @@ -3,5 +3,3 @@ # fail. You can use this to temporarily reserve a test name to # turn on CI side before PyTorch repository supports it. This # file has the same format as .jenkins/enabled-configs.txt - -py2-clang3.8-rocm1.7.1-ubuntu16.04-test diff --git a/.jenkins/pytorch/enabled-configs.txt b/.jenkins/pytorch/enabled-configs.txt index 6801323acd8e8..39456291d82d1 100644 --- a/.jenkins/pytorch/enabled-configs.txt +++ b/.jenkins/pytorch/enabled-configs.txt @@ -41,3 +41,4 @@ pytorch-docker-build-test short-perf-test-cpu short-perf-test-gpu py2-clang3.8-rocm1.7.1-ubuntu16.04-build +py2-clang3.8-rocm1.7.1-ubuntu16.04-test diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 7c8320b55e803..c7ac325e705d8 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -74,7 +74,7 @@ test_python_all_except_nn() { test_aten() { # Test ATen - if [[ "$BUILD_ENVIRONMENT" != *asan* ]]; then + if ([[ "$BUILD_ENVIRONMENT" != *asan* ]] && [[ "$BUILD_ENVIRONMENT" != *rocm* ]]); then echo "Running ATen tests with pytorch lib" TORCH_LIB_PATH=$(python -c "import site; print(site.getsitepackages()[0])")/torch/lib # NB: the ATen test binaries don't have RPATH set, so it's necessary to @@ -101,7 +101,7 @@ test_torchvision() { # this should be a transient requirement...) # See https://github.com/pytorch/pytorch/issues/7525 #time python setup.py install - pip install . + pip install --user . popd } diff --git a/test/common.py b/test/common.py index 4dbe3c56c47c9..bd43ef7ec6b0d 100644 --- a/test/common.py +++ b/test/common.py @@ -97,6 +97,14 @@ def _check_module_exists(name): if TEST_NUMPY: import numpy +def skipIfRocm(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + if TEST_WITH_ROCM: + raise unittest.SkipTest("test doesn't currently work on the ROCm stack") + else: + fn(*args, **kwargs) + return wrapper def skipIfNoLapack(fn): @wraps(fn) @@ -104,7 +112,7 @@ def wrapper(*args, **kwargs): try: fn(*args, **kwargs) except Exception as e: - if 'Lapack library not found' in e.args[0]: + if 'Lapack library not found' in repr(e): raise unittest.SkipTest('Compiled without Lapack') raise return wrapper diff --git a/test/run_test.py b/test/run_test.py index 65aa1003e3aa5..954837b0b85a7 100644 --- a/test/run_test.py +++ b/test/run_test.py @@ -13,6 +13,7 @@ import torch from torch.utils import cpp_extension +from common import TEST_WITH_ROCM TESTS = [ 'autograd', @@ -38,6 +39,25 @@ 'distributed', ] +ROCM_BLACKLIST = [ + 'c10d', + 'cpp_extensions', + 'cuda', + 'dataloader', + 'distributed', + 'distributions', + 'indexing', + 'jit', + 'legacy_nn', + 'multiprocessing', + 'nccl', + 'nn', + 'optim', + 'sparse', + 'torch', + 'utils', +] + DISTRIBUTED_TESTS_CONFIG = { 'tcp': { 'WORLD_SIZE': '3' @@ -303,6 +323,9 @@ def get_selected_tests(options): selected_tests = exclude_tests(WINDOWS_BLACKLIST, selected_tests, 'on Windows') + elif TEST_WITH_ROCM: + selected_tests = exclude_tests(ROCM_BLACKLIST, selected_tests, 'on ROCm') + return selected_tests diff --git a/test/test_autograd.py b/test/test_autograd.py index 9d39043db9b56..ac5c74f550e78 100644 --- a/test/test_autograd.py +++ b/test/test_autograd.py @@ -15,7 +15,7 @@ from torch.autograd.function import once_differentiable from torch.autograd.profiler import profile from common import TEST_MKL, TestCase, run_tests, skipIfNoLapack, \ - suppress_warnings, TEST_WITH_ROCM + suppress_warnings, TEST_WITH_ROCM, skipIfRocm from torch.autograd import Variable, Function, detect_anomaly from torch.autograd.function import InplaceFunction from torch.testing import make_non_contiguous, randn_like @@ -975,6 +975,7 @@ def test_no_requires_grad_inplace(self): with self.assertRaises(RuntimeError): b.add_(5) + @unittest.skipIf(TEST_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_requires_grad_factory(self): x = torch.randn(2, 3) fns = [torch.ones_like, torch.testing.randn_like] @@ -1374,6 +1375,7 @@ def __del__(self): Variable(torch.randn(10, 10), _grad_fn=CollectOnDelete()) @unittest.skipIf(torch.cuda.device_count() < 2, "no multi-GPU") + @unittest.skipIf(TEST_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_unused_output_gpu(self): from torch.nn.parallel._functions import Broadcast x = Variable(torch.randn(5, 5).float().cuda(), requires_grad=True) @@ -1402,6 +1404,7 @@ def backward(ctx, grad_output): self.assertEqual(device[0], 1) @unittest.skipIf(torch.cuda.device_count() < 2, "no multi-GPU") + @unittest.skipIf(TEST_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_inputbuffer_add_multigpu(self): input = torch.randn(1).cuda(0).requires_grad_() output = input.cuda(1) + input.cuda(1) @@ -1451,6 +1454,7 @@ def test_detach_base(self): self.assertIsNotNone(view.grad_fn) self.assertIs(view._base, x) + @unittest.skipIf(TEST_WITH_ROCM, "test doesn't currently work on the ROCm stack") def _test_type_conversion_backward(self, t, ): fvar = Variable(t(torch.randn(5, 5).float()), requires_grad=True) fvar.double().sum().backward() @@ -1910,6 +1914,7 @@ def test_cat_empty(self): lambda a, b: torch.cat((a, b)), True, f_args_variable, f_args_tensor) + @unittest.skipIf(TEST_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_potrf(self): root = Variable(torch.tril(torch.rand(S, S)), requires_grad=True) @@ -2069,6 +2074,7 @@ def run_test(input_size, exponent): run_test((10, 10), torch.zeros(10, 10)) run_test((10,), 0) + @unittest.skipIf(TEST_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_pinverse(self): # Why is pinverse tested this way, and not ordinarily as other linear algebra methods? # 1. Pseudo-inverses are not generally continuous, which means that they are not differentiable @@ -2451,6 +2457,7 @@ def backward(ctx, gO): out.backward() self.assertIn('MyFunc.apply', str(w[0].message)) + @unittest.skipIf(TEST_WITH_ROCM, "test doesn't currently work on the ROCm stack") def test_symeig_no_eigenvectors(self): A = torch.tensor([[1., 2.], [2., 4.]], dtype=torch.float32, requires_grad=True) w, v = torch.symeig(A, eigenvectors=False) @@ -3080,10 +3087,10 @@ class dont_convert(tuple): ('svd', lambda: random_fullrank_matrix_distinct_singular_value(M), NO_ARGS, 'large', NO_ARGS, [skipIfNoLapack]), ('gesv', (S, S), ((S, S),), '', NO_ARGS, [skipIfNoLapack]), - ('gesv', (S, S, S), ((S, S, S),), 'batched', NO_ARGS, [skipIfNoLapack]), - ('gesv', (2, 3, S, S), ((2, 3, S, S),), 'batched_dims', NO_ARGS, [skipIfNoLapack]), - ('gesv', (2, 2, S, S), ((1, S, S),), 'batched_broadcast_A', NO_ARGS, [skipIfNoLapack]), - ('gesv', (1, S, S), ((2, 2, S, S),), 'batched_broadcast_b', NO_ARGS, [skipIfNoLapack]), + ('gesv', (S, S, S), ((S, S, S),), 'batched', NO_ARGS, [skipIfNoLapack, skipIfRocm]), + ('gesv', (2, 3, S, S), ((2, 3, S, S),), 'batched_dims', NO_ARGS, [skipIfNoLapack, skipIfRocm]), + ('gesv', (2, 2, S, S), ((1, S, S),), 'batched_broadcast_A', NO_ARGS, [skipIfNoLapack, skipIfRocm]), + ('gesv', (1, S, S), ((2, 2, S, S),), 'batched_broadcast_b', NO_ARGS, [skipIfNoLapack, skipIfRocm]), ('fill_', (S, S, S), (1,), 'number'), ('fill_', (), (1,), 'number_scalar'), # FIXME: we should compute the derivative w.r.t torch.tensor(1)