From 17c66260ed67d6d85466163f795bdd092783639d Mon Sep 17 00:00:00 2001
From: Chaitanya Sri Krishna Lolla <lollachaitanya@gmail.com>
Date: Fri, 5 Oct 2018 12:35:52 -0700
Subject: [PATCH 01/11] enabled cuda tests (#248)

---
 test/test_cuda.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/test/test_cuda.py b/test/test_cuda.py
index cf21208c77aab..0ef834e995a8f 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -358,9 +358,9 @@ def tmp(t):
     ('max', small_3d_unique, lambda t: [1], 'dim'),
     ('max', small_3d_unique, lambda t: [-1], 'neg_dim'),
     ('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
-    ('min', small_3d_unique, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
-    ('min', small_3d_unique, lambda t: [1], 'dim', types, False, skipIfRocm),
-    ('min', small_3d_unique, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
+    ('min', small_3d_unique, lambda t: []),
+    ('min', small_3d_unique, lambda t: [1], 'dim'),
+    ('min', small_3d_unique, lambda t: [-1], 'neg_dim'),
     ('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
     ('mean', small_3d, lambda t: []),
     ('mean', small_3d, lambda t: [-1], 'neg_dim'),
@@ -411,11 +411,11 @@ def tmp(t):
     ('size', new_t(1, 2, 3, 4), lambda t: [],),
     ('size', new_t(1, 2, 3, 4), lambda t: [1], 'dim'),
     ('size', new_t(1, 2, 3, 4), lambda t: [-2], 'neg_dim'),
-    ('sort', small_3d_unique, lambda t: [], '', types, False, skipIfRocm),
-    ('sort', small_3d_unique, lambda t: [1], 'dim', types, False, skipIfRocm),
-    ('sort', small_3d_unique, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
-    ('sort', small_3d_unique, lambda t: [1, True], 'dim_descending', types, False, skipIfRocm),
-    ('sort', small_3d_unique, lambda t: [-1, True], 'neg_dim_descending', types, False, skipIfRocm),
+    ('sort', small_3d_unique, lambda t: [], ''),
+    ('sort', small_3d_unique, lambda t: [1], 'dim'),
+    ('sort', small_3d_unique, lambda t: [-1], 'neg_dim'),
+    ('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'),
+    ('sort', small_3d_unique, lambda t: [-1, True], 'neg_dim_descending'),
     ('split', small_3d, lambda t: [2],),
     ('split', small_3d, lambda t: [2, 1], 'dim'),
     ('split', small_3d, lambda t: [2, -3], 'neg_dim'),
@@ -427,10 +427,17 @@ def tmp(t):
     ('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],),
     ('transpose', new_t(1, 2, 3, 4), lambda t: [-1, -2], 'neg_dim'),
     ('to_list', small_3d, lambda t: [],),
+<<<<<<< HEAD
     ('topk', small_3d_unique, lambda t: [2, 1, False, True], 'dim_sort', types, False, skipIfRocm),
     ('topk', small_3d_unique, lambda t: [2, -1, False, True], 'neg_dim_sort', types, False, skipIfRocm),
     ('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort', types, False, skipIfRocm),
     ('trace', medium_2d, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
+=======
+    ('topk', small_3d_unique, lambda t: [2, 1, False, True], 'dim_sort', types, False, "skipIfRocm:HalfTensor"),
+    ('topk', small_3d_unique, lambda t: [2, -1, False, True], 'neg_dim_sort', types, False, "skipIfRocm:HalfTensor"),
+    ('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort', types, False, "skipIfRocm:HalfTensor"),
+    ('trace', medium_2d, lambda t: []),
+>>>>>>> 02262a240... enabled cuda tests (#248)
     ('tril', medium_2d, lambda t: [],),
     ('tril', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
     ('tril', medium_2d, lambda t: [2], 'positive'),
@@ -1228,11 +1235,9 @@ def test_cat(self):
         z = torch.cat([x, y])
         self.assertEqual(z.size(), (21, SIZE, SIZE))
 
-    @skipIfRocm
     def test_cat_empty_legacy(self):
         TestTorch._test_cat_empty_legacy(self, use_cuda=True)
 
-    @skipIfRocm
     def test_cat_empty(self):
         TestTorch._test_cat_empty(self, use_cuda=True)
 
@@ -1726,7 +1731,6 @@ def test_btrisolve(self):
     def test_dim_reduction(self):
         TestTorch._test_dim_reduction(self, lambda t: t.cuda())
 
-    @skipIfRocm
     def test_tensor_gather(self):
         TestTorch._test_gather(self, lambda t: t.cuda(), False)
 

From e26cc0e2256a057094db31ac54307134521986ab Mon Sep 17 00:00:00 2001
From: Chaitanya Sri Krishna Lolla <lollachaitanya@gmail.com>
Date: Fri, 5 Oct 2018 10:14:30 -0700
Subject: [PATCH 02/11] enabled tests in test_nn and test_torch (#247)

---
 test/test_nn.py    | 43 -------------------------------------------
 test/test_torch.py | 13 -------------
 2 files changed, 56 deletions(-)

diff --git a/test/test_nn.py b/test/test_nn.py
index 6f9004c76d04c..cbae395594884 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -1858,7 +1858,6 @@ def test_embedding_max_norm(self):
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
     @repeat_test_for_types(ALL_TENSORTYPES)
-    @skipIfRocm
     def test_embedding_max_norm_cuda(self, dtype=torch.float):
         embedding = nn.Embedding(22, 5, max_norm=1.0).to("cuda", dtype=dtype)
         # nn.Embedding only takes LongTensor as input
@@ -4558,7 +4557,6 @@ def test_inplace_thnn(self):
 
     @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
     @repeat_test_for_types(ALL_TENSORTYPES)
-    @skipIfRocm
     def test_noncontig_conv_grad_cuda(self, dtype=torch.float):
         # FIXME: remove after adding non-contiguous grad tests for all modules
         module = nn.Conv2d(3, 5, kernel_size=3, padding=1).to("cuda", dtype)
@@ -6010,7 +6008,6 @@ def test_conv_double_backward_stride(self):
                                                no_weight)
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
-    @skipIfRocm
     def test_cudnn_noncontiguous_weight(self):
         # Noncontiguous weights must be contiguous() before being
         # passed to cuDNN
@@ -7505,7 +7502,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         check_eval=True,
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='BatchNorm2d',
@@ -7515,7 +7511,6 @@ def multimarginloss_weights_no_reduce_test():
         check_eval=True,
         desc='2d_simple_average',
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='BatchNorm2d',
@@ -7525,7 +7520,6 @@ def multimarginloss_weights_no_reduce_test():
         check_eval=True,
         desc='momentum',
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='BatchNorm2d',
@@ -7544,7 +7538,6 @@ def multimarginloss_weights_no_reduce_test():
         check_eval=True,
         desc='not_tracking_stats',
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='BatchNorm3d',
@@ -7552,7 +7545,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 4, 4, 4),
         cudnn=True,
         check_eval=True,
-        decorator=skipIfRocm,
     ),
     dict(
         module_name='BatchNorm3d',
@@ -7561,7 +7553,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         check_eval=True,
         desc='3d_simple_average',
-        decorator=skipIfRocm,
     ),
     dict(
         module_name='BatchNorm3d',
@@ -7570,7 +7561,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         check_eval=True,
         desc='momentum',
-        decorator=skipIfRocm,
     ),
     dict(
         module_name='BatchNorm3d',
@@ -7587,7 +7577,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         check_eval=True,
         desc='not_tracking_stats',
-        decorator=skipIfRocm,
     ),
     dict(
         module_name='InstanceNorm1d',
@@ -7732,7 +7721,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 4, 10),
         cudnn=True,
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7741,7 +7729,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='stride',
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7750,7 +7737,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='pad1',
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7759,7 +7745,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='pad2',
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7768,7 +7753,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='pad1size1',
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7777,14 +7761,12 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='pad2size1',
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv1d_dilated',
         constructor=lambda: nn.Conv1d(4, 5, kernel_size=3, dilation=2),
         input_size=(2, 4, 10),
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv1d_groups',
@@ -7792,14 +7774,12 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 4, 6),
         cudnn=True,
         skip_double=TEST_WITH_ROCM,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='ConvTranspose1d',
         constructor=lambda: nn.ConvTranspose1d(3, 4, kernel_size=3, stride=(3,), padding=1, output_padding=(1,)),
         cudnn=True,
         input_size=(1, 3, 7),
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose1d',
@@ -7807,7 +7787,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(1, 3, 6),
         cudnn=True,
         desc='no_bias',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose1d',
@@ -7815,14 +7794,12 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(1, 3, 6),
         cudnn=True,
         desc='dilated',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='ConvTranspose1d_groups',
         constructor=lambda: nn.ConvTranspose1d(4, 6, 3, stride=(3,), padding=1, output_padding=(1,), groups=2),
         cudnn=True,
         input_size=(2, 4, 7),
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='MaxPool1d',
@@ -7840,7 +7817,6 @@ def multimarginloss_weights_no_reduce_test():
         constructor_args=(3, 4, (3, 2)),
         input_size=(2, 3, 7, 5),
         cudnn=True,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv2d',
@@ -7848,7 +7824,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 6, 6),
         cudnn=True,
         desc='strided',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv2d',
@@ -7856,7 +7831,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 6, 6),
         cudnn=True,
         desc='padding',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv2d',
@@ -7864,7 +7838,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 8, 8),
         cudnn=True,
         desc='dilated',
-        decorator=skipIfRocm,
     ),
     dict(
         module_name='Conv2d',
@@ -7872,27 +7845,23 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 6, 5),
         cudnn=True,
         desc='no_bias',
-        decorator=skipIfRocm,
     ),
     dict(
         fullname='Conv2d_groups',
         constructor=lambda: nn.Conv2d(4, 6, (3, 2), groups=2),
         input_size=(2, 4, 6, 5),
         cudnn=True,
-        decorator=skipIfRocm,
     ),
     dict(
         fullname='Conv2d_groups_thnn',
         constructor=lambda: nn.Conv2d(4, 6, (3, 2), groups=2),
         input_size=(2, 4, 6, 5),
-        decorator=skipIfRocm,
     ),
     dict(
         module_name='ConvTranspose2d',
         constructor_args=(3, 4, 3, (3, 2), 1, (1, 1)),
         cudnn=True,
         input_size=(1, 3, 7, 6),
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose2d',
@@ -7900,7 +7869,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(1, 3, 6, 7),
         cudnn=True,
         desc='dilated',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose2d',
@@ -7908,14 +7876,12 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(1, 3, 6, 7),
         cudnn=True,
         desc='no_bias',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='ConvTranspose2d_groups',
         constructor=lambda: nn.ConvTranspose2d(2, 4, (2, 3), groups=2),
         input_size=(1, 2, 4, 5),
         cudnn=True,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv2d_depthwise',
@@ -8072,7 +8038,6 @@ def multimarginloss_weights_no_reduce_test():
         constructor_args=(3, 4, (2, 3, 4)),
         input_size=(2, 3, 3, 4, 5),
         cudnn=True,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv3d',
@@ -8080,7 +8045,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 3, 4, 5),
         cudnn=True,
         desc='no_bias',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv3d',
@@ -8088,7 +8052,6 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 5, 5, 5),
         cudnn=True,
         desc='stride',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv3d',
@@ -8096,33 +8059,28 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 5, 5, 5),
         cudnn=True,
         desc='stride_padding',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv3d_groups',
         constructor=lambda: nn.Conv3d(4, 6, kernel_size=3, groups=2),
         input_size=(2, 4, 4, 5, 4),
         cudnn=True,
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv3d_dilated',
         constructor=lambda: nn.Conv3d(3, 4, kernel_size=2, dilation=2),
         input_size=(2, 3, 5, 5, 5),
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv3d_dilated_strided',
         constructor=lambda: nn.Conv3d(3, 4, kernel_size=2, dilation=2, stride=2),
         input_size=(2, 3, 5, 5, 5),
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose3d',
         constructor_args=(2, 3, (2, 3, 2)),
         cudnn=True,
         input_size=(1, 2, 4, 5, 4),
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose3d',
@@ -8130,7 +8088,6 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         input_size=(1, 2, 4, 5, 4),
         desc='dilated',
-        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='MaxPool3d',
diff --git a/test/test_torch.py b/test/test_torch.py
index 8479929e30f20..2ee0200f630c8 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -1005,7 +1005,6 @@ def test_multidim(x, dim):
             expected = fn(y, 1, keepdim=False)
             self.assertEqual(x[:, 1], expected, '{} with out= kwarg'.format(fn_name))
 
-    @skipIfRocm
     def test_dim_reduction(self):
         self._test_dim_reduction(self, lambda t: t)
 
@@ -3458,7 +3457,6 @@ def check_order(a, b):
                 seen.add(ixx[k][j])
             self.assertEqual(len(seen), size)
 
-    @skipIfRocm
     def test_sort(self):
         SIZE = 4
         x = torch.rand(SIZE, SIZE)
@@ -3584,7 +3582,6 @@ def test_topk_arguments(self):
         self.assertRaises(TypeError, lambda: q.topk(4, True))
 
     @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
-    @skipIfRocm
     def test_topk_noncontiguous_gpu(self):
         t = torch.randn(20, device="cuda")[::2]
         top1, idx1 = t.topk(5)
@@ -3592,7 +3589,6 @@ def test_topk_noncontiguous_gpu(self):
         self.assertEqual(top1, top2)
         self.assertEqual(idx1, idx2)
 
-    @skipIfRocm
     def test_kthvalue(self):
         SIZE = 50
         x = torch.rand(SIZE, SIZE, SIZE)
@@ -3637,7 +3633,6 @@ def test_kthvalue(self):
         self.assertEqual(torch.kthvalue(y, 3)[0], 3, 0)
         self.assertEqual(torch.kthvalue(y, 2)[0], 1, 0)
 
-    @skipIfRocm
     def test_median(self):
         for size in (155, 156):
             x = torch.rand(size, size)
@@ -3673,7 +3668,6 @@ def test_median(self):
             # input unchanged
             self.assertEqual(x, x0, 0)
 
-    @skipIfRocm
     def test_mode(self):
         x = torch.arange(1., SIZE * SIZE + 1).clone().resize_(SIZE, SIZE)
         x[:2] = 1
@@ -4003,7 +3997,6 @@ def test_is_signed_cuda(self):
         self.assertEqual(torch.cuda.HalfTensor(10).is_signed(), True)
 
     @skipIfNoLapack
-    @skipIfRocm
     def test_gesv(self):
         a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23),
                           (-6.05, -3.30, 5.36, -4.44, 1.08),
@@ -4135,7 +4128,6 @@ def test_gesv_batched_dims(self):
         self._test_gesv_batched_dims(self, lambda t: t)
 
     @skipIfNoLapack
-    @skipIfRocm
     def test_qr(self):
 
         # Since the QR decomposition is unique only up to the signs of the rows of
@@ -4323,7 +4315,6 @@ def test_trtrs(self):
         self._test_trtrs(self, lambda t: t)
 
     @skipIfNoLapack
-    @skipIfRocm
     def test_gels(self):
         def _test_underdetermined(a, b, expectedNorm):
             m = a.size()[0]
@@ -4439,7 +4430,6 @@ def check_norm(a, b, expected_norm, gels_result):
         self.assertEqual((torch.mm(a, tb) - b).norm(), expectedNorm, 1e-8)
 
     @skipIfNoLapack
-    @skipIfRocm
     def test_eig(self):
         a = torch.Tensor(((1.96, 0.00, 0.00, 0.00, 0.00),
                           (-6.49, 3.80, 0.00, 0.00, 0.00),
@@ -4521,7 +4511,6 @@ def _test_symeig(self, conv_fn):
         self.assertEqual(X, Xhat, 1e-8, 'VeV\' wrong')
 
     @skipIfNoLapack
-    @skipIfRocm
     def test_symeig(self):
         self._test_symeig(self, lambda x: x)
 
@@ -5306,7 +5295,6 @@ def test_potri(self):
         self.assertLessEqual(inv0.dist(inv1), 1e-12)
 
     @skipIfNoLapack
-    @skipIfRocm
     def test_pstrf(self):
         def checkPsdCholesky(a, uplo, inplace):
             if inplace:
@@ -8029,7 +8017,6 @@ def test_from_file(self):
             t2.fill_(rnum)
             self.assertEqual(t1, t2, 0)
 
-    @skipIfRocm
     def test_print(self):
         default_type = torch.Tensor().type()
         for t in torch._tensor_classes:

From db13cc863fddfed23e45392eb9c948a4fe7fdbb2 Mon Sep 17 00:00:00 2001
From: jithunnair-amd <jithun.nair@amd.com>
Date: Tue, 18 Sep 2018 12:08:48 -0500
Subject: [PATCH 03/11] Print library versions for ROCm libraries

---
 cmake/public/LoadHIP.cmake | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index 842cf349cbef5..a36c4b5f6cd9f 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -96,8 +96,13 @@ set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})
 # Disable Asserts In Code (Can't use asserts on HIP stack.)
 ADD_DEFINITIONS(-DNDEBUG)
 
+macro(find_package_and_print_version PACKAGE_NAME)
+  find_package("${PACKAGE_NAME}" ${ARGN})
+  message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
+endmacro()
+
 # Find the HIP Package
-FIND_PACKAGE(HIP 1.0)
+find_package_and_print_version(HIP 1.0)
 
 IF(HIP_FOUND)
   set(PYTORCH_FOUND_HIP TRUE)
@@ -119,18 +124,18 @@ IF(HIP_FOUND)
   set(hiprand_DIR ${HIPRAND_PATH}/lib/cmake/hiprand)
   set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas)
   set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
-  set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas)
   set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
   set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
   set(rocsparse_DIR ${ROCSPARSE_PATH}/lib/cmake/rocsparse)
 
-  find_package(rocrand REQUIRED)
-  find_package(hiprand REQUIRED)
-  find_package(rocblas REQUIRED)
-  find_package(rocfft REQUIRED)
-  find_package(miopen REQUIRED)
-  #find_package(rocsparse REQUIRED)
-  #find_package(hipsparse REQUIRED)
+  find_package_and_print_version(rocrand REQUIRED)
+  find_package_and_print_version(hiprand REQUIRED)
+  find_package_and_print_version(rocblas REQUIRED)
+  find_package_and_print_version(miopen REQUIRED)
+  find_package_and_print_version(miopengemm)
+  find_package_and_print_version(rocfft REQUIRED)
+  find_package_and_print_version(hipsparse REQUIRED)
+  find_package_and_print_version(rocsparse REQUIRED)
 
   # TODO: hip_hcc has an interface include flag "-hc" which is only
   # recognizable by hcc, but not gcc and clang. Right now in our

From c69ed3585a96a8d5c082d9d0f755c10f36fa147a Mon Sep 17 00:00:00 2001
From: jithunnair-amd <jithun.nair@amd.com>
Date: Tue, 18 Sep 2018 14:36:45 -0500
Subject: [PATCH 04/11] find_package for hipsparse results in link-time error
 for -lhipsparse-targets

---
 cmake/public/LoadHIP.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index a36c4b5f6cd9f..d48bdefa4776e 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -134,7 +134,7 @@ IF(HIP_FOUND)
   find_package_and_print_version(miopen REQUIRED)
   find_package_and_print_version(miopengemm)
   find_package_and_print_version(rocfft REQUIRED)
-  find_package_and_print_version(hipsparse REQUIRED)
+  #find_package_and_print_version(hipsparse REQUIRED)
   find_package_and_print_version(rocsparse REQUIRED)
 
   # TODO: hip_hcc has an interface include flag "-hc" which is only

From cc9bdfc4711a0b4047e8a599b7747f2cde4818a6 Mon Sep 17 00:00:00 2001
From: iotamudelta <dieterich@ogolem.org>
Date: Mon, 24 Sep 2018 11:16:28 -0500
Subject: [PATCH 05/11] Follow the white rabbit -- no AWS custom packages any
 longer. (#219)

* Follow the white rabbit -- no AWS custom packages any longer.

* The cmake change made it into the white rabbit drop, it seems.
---
 docker/caffe2/jenkins/common/install_rocm.sh | 53 ++------------------
 1 file changed, 5 insertions(+), 48 deletions(-)

diff --git a/docker/caffe2/jenkins/common/install_rocm.sh b/docker/caffe2/jenkins/common/install_rocm.sh
index 82692d0acdb9b..90d80902e0178 100644
--- a/docker/caffe2/jenkins/common/install_rocm.sh
+++ b/docker/caffe2/jenkins/common/install_rocm.sh
@@ -7,7 +7,7 @@ install_ubuntu() {
     apt-get install -y wget
     apt-get install -y libopenblas-dev
 
-    DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian
+    DEB_ROCM_REPO=http://repo.radeon.com/rocm/misc/facebook/apt/.apt_1.9.white_rabbit/debian
     # Add rocm repository
     wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add -
     echo "deb [arch=amd64] $DEB_ROCM_REPO xenial main" > /etc/apt/sources.list.d/rocm.list
@@ -22,15 +22,11 @@ install_ubuntu() {
                    miopengemm \
                    rocblas \
                    rocm-profiler \
-                   cxlactivitylogger
+                   cxlactivitylogger \
+                   rocsparse \
+                   hipsparse \
+                   rocrand
 
-    # hotfix a bug in hip's cmake files, this has been fixed in
-    # https://github.com/ROCm-Developer-Tools/HIP/pull/516 but for
-    # some reason it has not included in the latest rocm release
-    if [[ -f /opt/rocm/hip/cmake/FindHIP.cmake ]]; then
-        sudo sed -i 's/\ -I${dir}/\ $<$<BOOL:${dir}>:-I${dir}>/' /opt/rocm/hip/cmake/FindHIP.cmake
-    fi
-    
     # HIP has a bug that drops DEBUG symbols in generated MakeFiles.
     # https://github.com/ROCm-Developer-Tools/HIP/pull/588
     if [[ -f /opt/rocm/hip/cmake/FindHIP.cmake ]]; then
@@ -50,42 +46,6 @@ install_hip_thrust() {
     git clone --recursive https://github.com/ROCmSoftwarePlatform/cub-hip.git /data/Thrust/thrust/system/cuda/detail/cub-hip
 }
 
-# Install an updated version of rocRand that's PyTorch compatible.
-install_rocrand() {
-    mkdir -p /opt/rocm/debians
-    curl https://s3.amazonaws.com/ossci-linux/rocrand-1.8.0-Linux.deb -o /opt/rocm/debians/rocrand.deb 
-    dpkg -i /opt/rocm/debians/rocrand.deb
-}
-
-# Install rocSPARSE/hipSPARSE that will be released soon - can co-exist w/ hcSPARSE which will be removed soon
-install_hipsparse() {
-    mkdir -p /opt/rocm/debians
-    curl https://s3.amazonaws.com/ossci-linux/rocsparse-0.1.2.114-Linux.deb -o /opt/rocm/debians/rocsparse.deb
-    curl https://s3.amazonaws.com/ossci-linux/hipsparse-0.1.2.55-Linux.deb -o /opt/rocm/debians/hipsparse.deb
-    dpkg -i /opt/rocm/debians/rocsparse.deb
-    dpkg -i /opt/rocm/debians/hipsparse.deb
-}
-
-# Install custom hcc containing two compiler fixes relevant to PyTorch
-install_customhcc() {
-    HIP_VERSION="1.5.18354"
-    mkdir -p /opt/rocm/debians
-    curl https://s3.amazonaws.com/ossci-linux/hcc-1.2.18272-Linux.deb -o /opt/rocm/debians/hcc-Linux.deb
-    curl "https://s3.amazonaws.com/ossci-linux/hip_base-$HIP_VERSION.deb" -o /opt/rocm/debians/hip_base.deb
-    curl "https://s3.amazonaws.com/ossci-linux/hip_doc-$HIP_VERSION.deb" -o /opt/rocm/debians/hip_doc.deb
-    curl "https://s3.amazonaws.com/ossci-linux/hip_samples-$HIP_VERSION.deb" -o /opt/rocm/debians/hip_samples.deb
-    curl "https://s3.amazonaws.com/ossci-linux/hip_hcc-$HIP_VERSION.deb" -o /opt/rocm/debians/hip_hcc.deb
-    dpkg -i /opt/rocm/debians/hcc-Linux.deb
-    dpkg -i /opt/rocm/debians/hip_base.deb
-    dpkg -i /opt/rocm/debians/hip_doc.deb
-    dpkg -i /opt/rocm/debians/hip_samples.deb
-    dpkg -i /opt/rocm/debians/hip_hcc.deb
-
-    if [[ -f /opt/rocm/hip/cmake/FindHIP.cmake ]]; then
-        sudo sed -i 's/\ -I${dir}/\ $<$<BOOL:${dir}>:-I${dir}>/' /opt/rocm/hip/cmake/FindHIP.cmake
-    fi
-}
-
 # Install Python packages depending on the base OS
 if [ -f /etc/lsb-release ]; then
   install_ubuntu
@@ -97,6 +57,3 @@ else
 fi
 
 install_hip_thrust
-install_rocrand
-install_hipsparse
-install_customhcc

From 6324c54498a679ee05f4cdb554d578be7e849306 Mon Sep 17 00:00:00 2001
From: jithunnair-amd <37884920+jithunnair-amd@users.noreply.github.com>
Date: Mon, 24 Sep 2018 11:19:33 -0500
Subject: [PATCH 06/11] Print rocm-dev and other library versions (#220)

---
 cmake/public/LoadHIP.cmake | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index d48bdefa4776e..7c035fe7bfa05 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -101,6 +101,16 @@ macro(find_package_and_print_version PACKAGE_NAME)
   message("${PACKAGE_NAME} VERSION: ${${PACKAGE_NAME}_VERSION}")
 endmacro()
 
+message("\n***** Library versions from dpkg *****\n")
+execute_process(COMMAND dpkg -l COMMAND grep rocm-dev COMMAND awk "{print $2 \" VERSION: \" $3}")
+execute_process(COMMAND dpkg -l COMMAND grep rocm-libs COMMAND awk "{print $2 \" VERSION: \" $3}")
+execute_process(COMMAND dpkg -l COMMAND grep hsakmt-roct COMMAND awk "{print $2 \" VERSION: \" $3}")
+execute_process(COMMAND dpkg -l COMMAND grep rocr-dev COMMAND awk "{print $2 \" VERSION: \" $3}")
+execute_process(COMMAND dpkg -l COMMAND grep -w hcc COMMAND awk "{print $2 \" VERSION: \" $3}")
+execute_process(COMMAND dpkg -l COMMAND grep hip_base COMMAND awk "{print $2 \" VERSION: \" $3}")
+execute_process(COMMAND dpkg -l COMMAND grep hip_hcc COMMAND awk "{print $2 \" VERSION: \" $3}")
+
+message("\n***** Library versions from cmake find_package *****\n")
 # Find the HIP Package
 find_package_and_print_version(HIP 1.0)
 

From 859f8bf0174e889833b555366a66e16cfd489804 Mon Sep 17 00:00:00 2001
From: Johannes M Dieterich <Johannes.Dieterich@amd.com>
Date: Wed, 10 Oct 2018 11:30:32 -0500
Subject: [PATCH 07/11] Revert "enabled tests in test_nn and test_torch (#247)"

This reverts commit e26cc0e2256a057094db31ac54307134521986ab.
---
 test/test_nn.py    | 43 +++++++++++++++++++++++++++++++++++++++++++
 test/test_torch.py | 13 +++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/test/test_nn.py b/test/test_nn.py
index cbae395594884..6f9004c76d04c 100644
--- a/test/test_nn.py
+++ b/test/test_nn.py
@@ -1858,6 +1858,7 @@ def test_embedding_max_norm(self):
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
     @repeat_test_for_types(ALL_TENSORTYPES)
+    @skipIfRocm
     def test_embedding_max_norm_cuda(self, dtype=torch.float):
         embedding = nn.Embedding(22, 5, max_norm=1.0).to("cuda", dtype=dtype)
         # nn.Embedding only takes LongTensor as input
@@ -4557,6 +4558,7 @@ def test_inplace_thnn(self):
 
     @unittest.skipIf(not TEST_CUDA, 'CUDA not available')
     @repeat_test_for_types(ALL_TENSORTYPES)
+    @skipIfRocm
     def test_noncontig_conv_grad_cuda(self, dtype=torch.float):
         # FIXME: remove after adding non-contiguous grad tests for all modules
         module = nn.Conv2d(3, 5, kernel_size=3, padding=1).to("cuda", dtype)
@@ -6008,6 +6010,7 @@ def test_conv_double_backward_stride(self):
                                                no_weight)
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @skipIfRocm
     def test_cudnn_noncontiguous_weight(self):
         # Noncontiguous weights must be contiguous() before being
         # passed to cuDNN
@@ -7502,6 +7505,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         check_eval=True,
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='BatchNorm2d',
@@ -7511,6 +7515,7 @@ def multimarginloss_weights_no_reduce_test():
         check_eval=True,
         desc='2d_simple_average',
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='BatchNorm2d',
@@ -7520,6 +7525,7 @@ def multimarginloss_weights_no_reduce_test():
         check_eval=True,
         desc='momentum',
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='BatchNorm2d',
@@ -7538,6 +7544,7 @@ def multimarginloss_weights_no_reduce_test():
         check_eval=True,
         desc='not_tracking_stats',
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='BatchNorm3d',
@@ -7545,6 +7552,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 4, 4, 4),
         cudnn=True,
         check_eval=True,
+        decorator=skipIfRocm,
     ),
     dict(
         module_name='BatchNorm3d',
@@ -7553,6 +7561,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         check_eval=True,
         desc='3d_simple_average',
+        decorator=skipIfRocm,
     ),
     dict(
         module_name='BatchNorm3d',
@@ -7561,6 +7570,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         check_eval=True,
         desc='momentum',
+        decorator=skipIfRocm,
     ),
     dict(
         module_name='BatchNorm3d',
@@ -7577,6 +7587,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         check_eval=True,
         desc='not_tracking_stats',
+        decorator=skipIfRocm,
     ),
     dict(
         module_name='InstanceNorm1d',
@@ -7721,6 +7732,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 4, 10),
         cudnn=True,
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7729,6 +7741,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='stride',
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7737,6 +7750,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='pad1',
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7745,6 +7759,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='pad2',
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7753,6 +7768,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='pad1size1',
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv1d',
@@ -7761,12 +7777,14 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         desc='pad2size1',
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv1d_dilated',
         constructor=lambda: nn.Conv1d(4, 5, kernel_size=3, dilation=2),
         input_size=(2, 4, 10),
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv1d_groups',
@@ -7774,12 +7792,14 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 4, 6),
         cudnn=True,
         skip_double=TEST_WITH_ROCM,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='ConvTranspose1d',
         constructor=lambda: nn.ConvTranspose1d(3, 4, kernel_size=3, stride=(3,), padding=1, output_padding=(1,)),
         cudnn=True,
         input_size=(1, 3, 7),
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose1d',
@@ -7787,6 +7807,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(1, 3, 6),
         cudnn=True,
         desc='no_bias',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose1d',
@@ -7794,12 +7815,14 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(1, 3, 6),
         cudnn=True,
         desc='dilated',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='ConvTranspose1d_groups',
         constructor=lambda: nn.ConvTranspose1d(4, 6, 3, stride=(3,), padding=1, output_padding=(1,), groups=2),
         cudnn=True,
         input_size=(2, 4, 7),
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='MaxPool1d',
@@ -7817,6 +7840,7 @@ def multimarginloss_weights_no_reduce_test():
         constructor_args=(3, 4, (3, 2)),
         input_size=(2, 3, 7, 5),
         cudnn=True,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv2d',
@@ -7824,6 +7848,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 6, 6),
         cudnn=True,
         desc='strided',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv2d',
@@ -7831,6 +7856,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 6, 6),
         cudnn=True,
         desc='padding',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv2d',
@@ -7838,6 +7864,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 8, 8),
         cudnn=True,
         desc='dilated',
+        decorator=skipIfRocm,
     ),
     dict(
         module_name='Conv2d',
@@ -7845,23 +7872,27 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 6, 5),
         cudnn=True,
         desc='no_bias',
+        decorator=skipIfRocm,
     ),
     dict(
         fullname='Conv2d_groups',
         constructor=lambda: nn.Conv2d(4, 6, (3, 2), groups=2),
         input_size=(2, 4, 6, 5),
         cudnn=True,
+        decorator=skipIfRocm,
     ),
     dict(
         fullname='Conv2d_groups_thnn',
         constructor=lambda: nn.Conv2d(4, 6, (3, 2), groups=2),
         input_size=(2, 4, 6, 5),
+        decorator=skipIfRocm,
     ),
     dict(
         module_name='ConvTranspose2d',
         constructor_args=(3, 4, 3, (3, 2), 1, (1, 1)),
         cudnn=True,
         input_size=(1, 3, 7, 6),
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose2d',
@@ -7869,6 +7900,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(1, 3, 6, 7),
         cudnn=True,
         desc='dilated',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose2d',
@@ -7876,12 +7908,14 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(1, 3, 6, 7),
         cudnn=True,
         desc='no_bias',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='ConvTranspose2d_groups',
         constructor=lambda: nn.ConvTranspose2d(2, 4, (2, 3), groups=2),
         input_size=(1, 2, 4, 5),
         cudnn=True,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv2d_depthwise',
@@ -8038,6 +8072,7 @@ def multimarginloss_weights_no_reduce_test():
         constructor_args=(3, 4, (2, 3, 4)),
         input_size=(2, 3, 3, 4, 5),
         cudnn=True,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv3d',
@@ -8045,6 +8080,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 3, 4, 5),
         cudnn=True,
         desc='no_bias',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv3d',
@@ -8052,6 +8088,7 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 5, 5, 5),
         cudnn=True,
         desc='stride',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='Conv3d',
@@ -8059,28 +8096,33 @@ def multimarginloss_weights_no_reduce_test():
         input_size=(2, 3, 5, 5, 5),
         cudnn=True,
         desc='stride_padding',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv3d_groups',
         constructor=lambda: nn.Conv3d(4, 6, kernel_size=3, groups=2),
         input_size=(2, 4, 4, 5, 4),
         cudnn=True,
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv3d_dilated',
         constructor=lambda: nn.Conv3d(3, 4, kernel_size=2, dilation=2),
         input_size=(2, 3, 5, 5, 5),
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         fullname='Conv3d_dilated_strided',
         constructor=lambda: nn.Conv3d(3, 4, kernel_size=2, dilation=2, stride=2),
         input_size=(2, 3, 5, 5, 5),
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose3d',
         constructor_args=(2, 3, (2, 3, 2)),
         cudnn=True,
         input_size=(1, 2, 4, 5, 4),
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='ConvTranspose3d',
@@ -8088,6 +8130,7 @@ def multimarginloss_weights_no_reduce_test():
         cudnn=True,
         input_size=(1, 2, 4, 5, 4),
         desc='dilated',
+        test_cuda=(not TEST_WITH_ROCM),
     ),
     dict(
         module_name='MaxPool3d',
diff --git a/test/test_torch.py b/test/test_torch.py
index 2ee0200f630c8..8479929e30f20 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -1005,6 +1005,7 @@ def test_multidim(x, dim):
             expected = fn(y, 1, keepdim=False)
             self.assertEqual(x[:, 1], expected, '{} with out= kwarg'.format(fn_name))
 
+    @skipIfRocm
     def test_dim_reduction(self):
         self._test_dim_reduction(self, lambda t: t)
 
@@ -3457,6 +3458,7 @@ def check_order(a, b):
                 seen.add(ixx[k][j])
             self.assertEqual(len(seen), size)
 
+    @skipIfRocm
     def test_sort(self):
         SIZE = 4
         x = torch.rand(SIZE, SIZE)
@@ -3582,6 +3584,7 @@ def test_topk_arguments(self):
         self.assertRaises(TypeError, lambda: q.topk(4, True))
 
     @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
+    @skipIfRocm
     def test_topk_noncontiguous_gpu(self):
         t = torch.randn(20, device="cuda")[::2]
         top1, idx1 = t.topk(5)
@@ -3589,6 +3592,7 @@ def test_topk_noncontiguous_gpu(self):
         self.assertEqual(top1, top2)
         self.assertEqual(idx1, idx2)
 
+    @skipIfRocm
     def test_kthvalue(self):
         SIZE = 50
         x = torch.rand(SIZE, SIZE, SIZE)
@@ -3633,6 +3637,7 @@ def test_kthvalue(self):
         self.assertEqual(torch.kthvalue(y, 3)[0], 3, 0)
         self.assertEqual(torch.kthvalue(y, 2)[0], 1, 0)
 
+    @skipIfRocm
     def test_median(self):
         for size in (155, 156):
             x = torch.rand(size, size)
@@ -3668,6 +3673,7 @@ def test_median(self):
             # input unchanged
             self.assertEqual(x, x0, 0)
 
+    @skipIfRocm
     def test_mode(self):
         x = torch.arange(1., SIZE * SIZE + 1).clone().resize_(SIZE, SIZE)
         x[:2] = 1
@@ -3997,6 +4003,7 @@ def test_is_signed_cuda(self):
         self.assertEqual(torch.cuda.HalfTensor(10).is_signed(), True)
 
     @skipIfNoLapack
+    @skipIfRocm
     def test_gesv(self):
         a = torch.Tensor(((6.80, -2.11, 5.66, 5.97, 8.23),
                           (-6.05, -3.30, 5.36, -4.44, 1.08),
@@ -4128,6 +4135,7 @@ def test_gesv_batched_dims(self):
         self._test_gesv_batched_dims(self, lambda t: t)
 
     @skipIfNoLapack
+    @skipIfRocm
     def test_qr(self):
 
         # Since the QR decomposition is unique only up to the signs of the rows of
@@ -4315,6 +4323,7 @@ def test_trtrs(self):
         self._test_trtrs(self, lambda t: t)
 
     @skipIfNoLapack
+    @skipIfRocm
     def test_gels(self):
         def _test_underdetermined(a, b, expectedNorm):
             m = a.size()[0]
@@ -4430,6 +4439,7 @@ def check_norm(a, b, expected_norm, gels_result):
         self.assertEqual((torch.mm(a, tb) - b).norm(), expectedNorm, 1e-8)
 
     @skipIfNoLapack
+    @skipIfRocm
     def test_eig(self):
         a = torch.Tensor(((1.96, 0.00, 0.00, 0.00, 0.00),
                           (-6.49, 3.80, 0.00, 0.00, 0.00),
@@ -4511,6 +4521,7 @@ def _test_symeig(self, conv_fn):
         self.assertEqual(X, Xhat, 1e-8, 'VeV\' wrong')
 
     @skipIfNoLapack
+    @skipIfRocm
     def test_symeig(self):
         self._test_symeig(self, lambda x: x)
 
@@ -5295,6 +5306,7 @@ def test_potri(self):
         self.assertLessEqual(inv0.dist(inv1), 1e-12)
 
     @skipIfNoLapack
+    @skipIfRocm
     def test_pstrf(self):
         def checkPsdCholesky(a, uplo, inplace):
             if inplace:
@@ -8017,6 +8029,7 @@ def test_from_file(self):
             t2.fill_(rnum)
             self.assertEqual(t1, t2, 0)
 
+    @skipIfRocm
     def test_print(self):
         default_type = torch.Tensor().type()
         for t in torch._tensor_classes:

From 287284ad574f3c6cc478aa3b020a701dffbbb536 Mon Sep 17 00:00:00 2001
From: Johannes M Dieterich <Johannes.Dieterich@amd.com>
Date: Wed, 10 Oct 2018 11:30:35 -0500
Subject: [PATCH 08/11] Revert "enabled cuda tests (#248)"

This reverts commit 17c66260ed67d6d85466163f795bdd092783639d.
---
 test/test_cuda.py | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/test/test_cuda.py b/test/test_cuda.py
index 0ef834e995a8f..cf21208c77aab 100644
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@@ -358,9 +358,9 @@ def tmp(t):
     ('max', small_3d_unique, lambda t: [1], 'dim'),
     ('max', small_3d_unique, lambda t: [-1], 'neg_dim'),
     ('max', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
-    ('min', small_3d_unique, lambda t: []),
-    ('min', small_3d_unique, lambda t: [1], 'dim'),
-    ('min', small_3d_unique, lambda t: [-1], 'neg_dim'),
+    ('min', small_3d_unique, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
+    ('min', small_3d_unique, lambda t: [1], 'dim', types, False, skipIfRocm),
+    ('min', small_3d_unique, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
     ('min', medium_2d, lambda t: [medium_2d(t)], 'elementwise'),
     ('mean', small_3d, lambda t: []),
     ('mean', small_3d, lambda t: [-1], 'neg_dim'),
@@ -411,11 +411,11 @@ def tmp(t):
     ('size', new_t(1, 2, 3, 4), lambda t: [],),
     ('size', new_t(1, 2, 3, 4), lambda t: [1], 'dim'),
     ('size', new_t(1, 2, 3, 4), lambda t: [-2], 'neg_dim'),
-    ('sort', small_3d_unique, lambda t: [], ''),
-    ('sort', small_3d_unique, lambda t: [1], 'dim'),
-    ('sort', small_3d_unique, lambda t: [-1], 'neg_dim'),
-    ('sort', small_3d_unique, lambda t: [1, True], 'dim_descending'),
-    ('sort', small_3d_unique, lambda t: [-1, True], 'neg_dim_descending'),
+    ('sort', small_3d_unique, lambda t: [], '', types, False, skipIfRocm),
+    ('sort', small_3d_unique, lambda t: [1], 'dim', types, False, skipIfRocm),
+    ('sort', small_3d_unique, lambda t: [-1], 'neg_dim', types, False, skipIfRocm),
+    ('sort', small_3d_unique, lambda t: [1, True], 'dim_descending', types, False, skipIfRocm),
+    ('sort', small_3d_unique, lambda t: [-1, True], 'neg_dim_descending', types, False, skipIfRocm),
     ('split', small_3d, lambda t: [2],),
     ('split', small_3d, lambda t: [2, 1], 'dim'),
     ('split', small_3d, lambda t: [2, -3], 'neg_dim'),
@@ -427,17 +427,10 @@ def tmp(t):
     ('transpose', new_t(1, 2, 3, 4), lambda t: [1, 2],),
     ('transpose', new_t(1, 2, 3, 4), lambda t: [-1, -2], 'neg_dim'),
     ('to_list', small_3d, lambda t: [],),
-<<<<<<< HEAD
     ('topk', small_3d_unique, lambda t: [2, 1, False, True], 'dim_sort', types, False, skipIfRocm),
     ('topk', small_3d_unique, lambda t: [2, -1, False, True], 'neg_dim_sort', types, False, skipIfRocm),
     ('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort', types, False, skipIfRocm),
     ('trace', medium_2d, lambda t: [], '', types, False, "skipIfRocm:HalfTensor"),
-=======
-    ('topk', small_3d_unique, lambda t: [2, 1, False, True], 'dim_sort', types, False, "skipIfRocm:HalfTensor"),
-    ('topk', small_3d_unique, lambda t: [2, -1, False, True], 'neg_dim_sort', types, False, "skipIfRocm:HalfTensor"),
-    ('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort', types, False, "skipIfRocm:HalfTensor"),
-    ('trace', medium_2d, lambda t: []),
->>>>>>> 02262a240... enabled cuda tests (#248)
     ('tril', medium_2d, lambda t: [],),
     ('tril', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
     ('tril', medium_2d, lambda t: [2], 'positive'),
@@ -1235,9 +1228,11 @@ def test_cat(self):
         z = torch.cat([x, y])
         self.assertEqual(z.size(), (21, SIZE, SIZE))
 
+    @skipIfRocm
     def test_cat_empty_legacy(self):
         TestTorch._test_cat_empty_legacy(self, use_cuda=True)
 
+    @skipIfRocm
     def test_cat_empty(self):
         TestTorch._test_cat_empty(self, use_cuda=True)
 
@@ -1731,6 +1726,7 @@ def test_btrisolve(self):
     def test_dim_reduction(self):
         TestTorch._test_dim_reduction(self, lambda t: t.cuda())
 
+    @skipIfRocm
     def test_tensor_gather(self):
         TestTorch._test_gather(self, lambda t: t.cuda(), False)
 

From 842997aeec8cf4dfdcd7277ef570d711f2487af6 Mon Sep 17 00:00:00 2001
From: Yaxun Sam Liu <yaxun.liu@amd.com>
Date: Wed, 12 Sep 2018 16:14:34 -0400
Subject: [PATCH 09/11] Fix HIP_HIPCC_FLAGS

HIP_HIPCC_FLAGS is defined as a list with generator expression
in FindHIP.cmake. It should be treated as a list in caffe cmake
files. Also target_compile_options needs to be used since
set_target_properties does not support generator expression.
---
 caffe2/CMakeLists.txt    |  4 ++--
 cmake/Dependencies.cmake | 24 ++++++++++++------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 07f69d9f7bab9..373110982d0aa 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -365,7 +365,7 @@ if(USE_ROCM)
   hip_add_library(caffe2_hip ${Caffe2_HIP_SRCS})
 
   # Since PyTorch files contain HIP headers, these flags are required for the necessary definitions to be added.
-  set_target_properties(caffe2_hip PROPERTIES COMPILE_FLAGS ${HIP_HIPCC_FLAGS})
+  target_compile_options(caffe2_hip PRIVATE ${HIP_HIPCC_FLAGS})
   target_link_libraries(caffe2_hip PUBLIC caffe2)
   target_link_libraries(caffe2_hip PUBLIC ${Caffe2_HIP_DEPENDENCY_LIBS})
 
@@ -550,7 +550,7 @@ if (BUILD_PYTHON)
   if(USE_ROCM)
     hip_add_library(caffe2_pybind11_state_hip MODULE ${Caffe2_HIP_PYTHON_SRCS})
     set_target_properties(caffe2_pybind11_state_hip PROPERTIES LINKER_LANGUAGE HIP)
-    set_target_properties(caffe2_pybind11_state_hip PROPERTIES COMPILE_FLAGS "${HIP_HIPCC_FLAGS} -fvisibility=hidden")
+    target_compile_options(caffe2_pybind11_state_hip PRIVATE ${HIP_HIPCC_FLAGS} -fvisibility=hidden)
     set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "")
     set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX ${PY_EXT_SUFFIX})
     if (APPLE)
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 98685e961e1bf..d927d62d0be4d 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -523,18 +523,18 @@ if(NOT BUILD_ATEN_MOBILE)
     message(INFO "Compiling with HIP for AMD.")
     caffe2_update_option(USE_ROCM ON)
 
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -fPIC")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -D__HIP_PLATFORM_HCC__=1")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DCUDA_HAS_FP16=1")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -D__HIP_NO_HALF_OPERATORS__=1")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -D__HIP_NO_HALF_CONVERSIONS__=1")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -Wno-macro-redefined")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -Wno-inconsistent-missing-override")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -Wno-exceptions")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -Wno-shift-count-negative")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -Wno-shift-count-overflow")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -Wno-unused-command-line-argument")
-    set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -Wno-duplicate-decl-specifier")
+    list(APPEND HIP_HIPCC_FLAGS -fPIC)
+    list(APPEND HIP_HIPCC_FLAGS -D__HIP_PLATFORM_HCC__=1)
+    list(APPEND HIP_HIPCC_FLAGS -DCUDA_HAS_FP16=1)
+    list(APPEND HIP_HIPCC_FLAGS -D__HIP_NO_HALF_OPERATORS__=1)
+    list(APPEND HIP_HIPCC_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
+    list(APPEND HIP_HIPCC_FLAGS -Wno-macro-redefined)
+    list(APPEND HIP_HIPCC_FLAGS -Wno-inconsistent-missing-override)
+    list(APPEND HIP_HIPCC_FLAGS -Wno-exceptions)
+    list(APPEND HIP_HIPCC_FLAGS -Wno-shift-count-negative)
+    list(APPEND HIP_HIPCC_FLAGS -Wno-shift-count-overflow)
+    list(APPEND HIP_HIPCC_FLAGS -Wno-unused-command-line-argument)
+    list(APPEND HIP_HIPCC_FLAGS -Wno-duplicate-decl-specifier)
 
     set(Caffe2_HIP_INCLUDES
       ${hip_INCLUDE_DIRS} ${hcc_INCLUDE_DIRS} ${hsa_INCLUDE_DIRS} ${rocrand_INCLUDE_DIRS} ${hiprand_INCLUDE_DIRS} ${rocblas_INCLUDE_DIRS} ${miopen_INCLUDE_DIRS} ${thrust_INCLUDE_DIRS} $<INSTALL_INTERFACE:include> ${Caffe2_HIP_INCLUDES})

From ddc3bbfd9bdb6aed2e74062e8cbf845571cec420 Mon Sep 17 00:00:00 2001
From: iotamudelta <dieterich@ogolem.org>
Date: Wed, 10 Oct 2018 11:51:45 -0500
Subject: [PATCH 10/11] RHEL enablement (#259)

* Add miopengemm as a proper, required dependency to LoadHIP.

* Always install hip-thrust
---
 cmake/public/LoadHIP.cmake                   | 10 +++++++++-
 docker/caffe2/jenkins/common/install_rocm.sh |  5 ++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/cmake/public/LoadHIP.cmake b/cmake/public/LoadHIP.cmake
index 7c035fe7bfa05..1468cc9473b0a 100644
--- a/cmake/public/LoadHIP.cmake
+++ b/cmake/public/LoadHIP.cmake
@@ -83,6 +83,13 @@ ELSE()
   SET(ROCRAND_PATH $ENV{ROCRAND_PATH})
 ENDIF()
 
+# MIOPENGEMM
+IF(NOT DEFINED ENV{MIOPENGEMM_PATH})
+  SET(MIOPENGEMM_PATH ${ROCM_PATH}/miopengemm)
+ELSE()
+  SET(MIOPENGEMM_PATH $ENV{MIOPENGEMM_PATH})
+ENDIF()
+
 # MIOPEN_PATH
 IF(NOT DEFINED ENV{MIOPEN_PATH})
   SET(MIOPEN_PATH ${ROCM_PATH}/miopen)
@@ -133,6 +140,7 @@ IF(HIP_FOUND)
   set(rocrand_DIR ${ROCRAND_PATH}/lib/cmake/rocrand)
   set(hiprand_DIR ${HIPRAND_PATH}/lib/cmake/hiprand)
   set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas)
+  set(miopengemm_DIR ${MIOPENGEMM_PATH}/lib/cmake/miopengemm)
   set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
   set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
   set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
@@ -142,7 +150,7 @@ IF(HIP_FOUND)
   find_package_and_print_version(hiprand REQUIRED)
   find_package_and_print_version(rocblas REQUIRED)
   find_package_and_print_version(miopen REQUIRED)
-  find_package_and_print_version(miopengemm)
+  find_package_and_print_version(miopengemm REQUIRED)
   find_package_and_print_version(rocfft REQUIRED)
   #find_package_and_print_version(hipsparse REQUIRED)
   find_package_and_print_version(rocsparse REQUIRED)
diff --git a/docker/caffe2/jenkins/common/install_rocm.sh b/docker/caffe2/jenkins/common/install_rocm.sh
index 90d80902e0178..25d2f1da44663 100644
--- a/docker/caffe2/jenkins/common/install_rocm.sh
+++ b/docker/caffe2/jenkins/common/install_rocm.sh
@@ -25,7 +25,8 @@ install_ubuntu() {
                    cxlactivitylogger \
                    rocsparse \
                    hipsparse \
-                   rocrand
+                   rocrand \
+                   hip-thrust
 
     # HIP has a bug that drops DEBUG symbols in generated MakeFiles.
     # https://github.com/ROCm-Developer-Tools/HIP/pull/588
@@ -41,6 +42,8 @@ install_centos() {
  
 install_hip_thrust() {
     # Needed for now, will be replaced soon
+    # We are now (redundantly) installing the Thrust package into another location (/opt/rocm/include/thrust) which we will
+    # switch over to
     git clone --recursive https://github.com/ROCmSoftwarePlatform/Thrust.git /data/Thrust
     rm -rf /data/Thrust/thrust/system/cuda/detail/cub-hip
     git clone --recursive https://github.com/ROCmSoftwarePlatform/cub-hip.git /data/Thrust/thrust/system/cuda/detail/cub-hip

From 5c6c4424b4afe0cbc9eb4f1c68fe5cb89740d3a5 Mon Sep 17 00:00:00 2001
From: Johannes M Dieterich <Johannes.Dieterich@amd.com>
Date: Wed, 10 Oct 2018 14:53:24 -0500
Subject: [PATCH 11/11] Skip regressions with white rabbit.

---
 test/test_dataloader.py | 4 ++++
 test/test_torch.py      | 1 +
 2 files changed, 5 insertions(+)

diff --git a/test/test_dataloader.py b/test/test_dataloader.py
index fa8db87cdc74a..02f83e80a4af1 100644
--- a/test/test_dataloader.py
+++ b/test/test_dataloader.py
@@ -417,6 +417,7 @@ def test_growing_dataset(self):
         self.assertEqual(len(dataloader_shuffle), 5)
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @skipIfRocm
     def test_sequential_pin_memory(self):
         loader = DataLoader(self.dataset, batch_size=2, pin_memory=True)
         for input, target in loader:
@@ -575,6 +576,7 @@ def test_batch_sampler(self):
         self._test_batch_sampler(num_workers=4)
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @skipIfRocm
     def test_shuffle_pin_memory(self):
         loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True)
         for input, target in loader:
@@ -809,6 +811,7 @@ def setUp(self):
         self.dataset = StringDataset()
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @skipIfRocm
     def test_shuffle_pin_memory(self):
         loader = DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4, pin_memory=True)
         for batch_ndx, (s, n) in enumerate(loader):
@@ -852,6 +855,7 @@ def test_sequential_batch(self):
             self.assertEqual(n[1], idx + 1)
 
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @skipIfRocm
     def test_pin_memory(self):
         loader = DataLoader(self.dataset, batch_size=2, pin_memory=True)
         for batch_ndx, sample in enumerate(loader):
diff --git a/test/test_torch.py b/test/test_torch.py
index 8479929e30f20..725d8804e668d 100644
--- a/test/test_torch.py
+++ b/test/test_torch.py
@@ -8201,6 +8201,7 @@ def test_empty_like(self):
             self.assertEqual(torch.empty_like(a).type(), a.type())
 
     @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
+    @skipIfRocm
     def test_pin_memory(self):
         x = torch.randn(3, 5)
         self.assertFalse(x.is_pinned())