Merge pull request tensorflow#48 from ROCmSoftwarePlatform/fix_rocbla…

…s_known_limitation Skip tests to cope with known limitations in rocBLAS
Cerebras · Jun 22, 2018 · bea55d4 · bea55d4
2 parents bfcad95 + 2e3147a
commit bea55d4
Show file tree

Hide file tree

Showing 12 changed files with 66 additions and 35 deletions.
diff --git a/tensorflow/python/kernel_tests/batch_matmul_op_test.py b/tensorflow/python/kernel_tests/batch_matmul_op_test.py
@@ -196,9 +196,13 @@ def Test(self):
 
 
 if __name__ == "__main__":
-  for dtype_ in [
-      np.float16, np.float32, np.float64, np.complex64, np.complex128, np.int32
-  ]:
+  dtypes_to_test = [np.float16, np.float32, np.float64, np.complex64,
+                    np.complex128, np.int32]
+  if test.is_built_with_rocm():
+    # rocBLAS in ROCm stack does not support GEMM for complex types
+    #dtypes_to_test = [np.float16, np.float32, np.float64, np.int32]
+    dtypes_to_test = [np.float32, np.float64, np.int32]
+  for dtype_ in dtypes_to_test:
     for adjoint_a_ in False, True:
       for adjoint_b_ in False, True:
         name = "%s_%s_%s" % (dtype_.__name__, adjoint_a_, adjoint_b_)

diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py
@@ -110,7 +110,8 @@ def _verifyCholeskyBase(self, sess, x, chol, verification):
 
   def _verifyCholesky(self, x):
     # Verify that LL^T == x.
-    with self.test_session(use_gpu=True) as sess:
+    # rocBLAS on ROCm stack does not support TRSM yet
+    with self.test_session(use_gpu=True and not test.is_built_with_rocm()) as sess:
       chol = linalg_ops.cholesky(x)
       verification = math_ops.matmul(chol, chol, adjoint_b=True)
       self._verifyCholeskyBase(sess, x, chol, verification)
@@ -255,7 +256,8 @@ def runFiniteDifferences(self,
                            dtypes=(dtypes_lib.float32, dtypes_lib.float64,
                                    dtypes_lib.complex64, dtypes_lib.complex128),
                            scalarTest=False):
-    with self.test_session(use_gpu=True):
+    # rocBLAS on ROCm stack does not support TRSM yet
+    with self.test_session(use_gpu=True and not test.is_built_with_rocm()):
       for shape in shapes:
         for batch in False, True:
           for dtype in dtypes:

diff --git a/tensorflow/python/kernel_tests/linalg_grad_test.py b/tensorflow/python/kernel_tests/linalg_grad_test.py
@@ -97,9 +97,13 @@ def _GetMatrixBinaryFunctorGradientTest(functor_,
                                         **kwargs_):
 
   def Test(self):
+    # rocBLAS on ROCm stack does not support TRSM yet
+    if test_lib.is_built_with_rocm():
+      use_gpu = False
+    else:
     # TODO(rmlarsen): Debug illegal address bug on CUDA and re-enable
     # GPU test for matrix_solve.
-    use_gpu = False if functor_ == linalg_ops.matrix_solve else True
+      use_gpu = False if functor_ == linalg_ops.matrix_solve else True
 
     with self.test_session(use_gpu=use_gpu):
       np.random.seed(1)

diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py
@@ -52,7 +52,8 @@ def setUp(self):
   def test_works_with_five_different_random_pos_def_matrices(self):
     for n in range(1, 6):
       for np_type, atol in [(np.float32, 0.05), (np.float64, 1e-5)]:
-        with self.test_session(use_gpu=True):
+        # rocBLAS on ROCm stack does not support TRSM for fp32/fp64 yet
+        with self.test_session(use_gpu=True and not test.is_built_with_rocm()):
           # Create 2 x n x n matrix
           array = np.array(
               [_RandomPDMatrix(n, self.rng),
@@ -180,11 +181,12 @@ def Test(self):
 
 
 if __name__ == "__main__":
+  dtypes_to_test = [dtypes.int32, dtypes.int64, dtypes.float32,
+                    dtypes.float64, dtypes.complex64, dtypes.complex128]
   for _num_rows in 0, 1, 2, 5:
     for _num_columns in None, 0, 1, 2, 5:
       for _batch_shape in None, [], [2], [2, 3]:
-        for _dtype in (dtypes.int32, dtypes.int64, dtypes.float32,
-                       dtypes.float64, dtypes.complex64, dtypes.complex128):
+        for _dtype in dtypes_to_test:
           name = "dtype_%s_num_rows_%s_num_column_%s_batch_shape_%s_" % (
               _dtype.name, _num_rows, _num_columns, _batch_shape)
           _AddTest(EyeTest, "EyeTest", name,

diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py
@@ -213,9 +213,14 @@ def testInfixMatmulDoesDotProduct(self):
 if __name__ == "__main__":
   sizes = [1, 3, 5]
   trans_options = [[False, False], [True, False], [False, True]]
+  dtypes_to_test = [np.int32, np.float16, np.float32, np.float64, np.complex64,
+                  np.complex128]
+  if test_lib.is_built_with_rocm():
+    # rocBLAS on ROCm stack does not support GEMV for complex types
+    # rocBLAS on ROCm stack does not support SGEMM for fp16 types
+    dtypes_to_test = [np.int32, np.float32, np.float64]
   for use_static_shape in [False, True]:
-    for dtype in (np.int32, np.float16, np.float32, np.float64, np.complex64,
-                  np.complex128):
+    for dtype in dtypes_to_test:
       if not use_static_shape and dtype == np.int32:
         # TODO(rmlarsen): Re-enable this test when we have fixed the underlying
         # bug in Windows (b/35935459).

diff --git a/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py b/tensorflow/python/kernel_tests/matrix_solve_ls_op_test.py
@@ -344,7 +344,12 @@ def benchmarkMatrixSolveLsOp(self):
 
 
 if __name__ == "__main__":
-  for dtype_ in [np.float32, np.float64, np.complex64, np.complex128]:
+  dtypes_to_test = [np.float32, np.float64, np.complex64, np.complex128]
+  if test_lib.is_built_with_rocm():
+    # rocBLAS on ROCm stack does not support batched GEMM for complex types
+    # rocBLAS on ROCm stack doesn not support TRSM for fp64
+    dtypes_to_test = [np.float32]
+  for dtype_ in dtypes_to_test:
     for use_placeholder_ in [True, False]:
       for fast_ in [True, False]:
         l2_regularizers = [0] if dtype_ == np.complex128 else [0, 0.1]
@@ -358,7 +363,7 @@ def benchmarkMatrixSolveLsOp(self):
                                                              l2_regularizer_)
             _AddTest(MatrixSolveLsOpTest, "MatrixSolveLsOpTest", name,
                      test_case)
-  for dtype_ in [np.float32, np.float64, np.complex64, np.complex128]:
+  for dtype_ in dtypes_to_test:
     for test_case in _GetLargeMatrixSolveLsOpTests(dtype_, False, True, 0.0):
       name = "%s_%s" % (test_case.__name__, dtype_.__name__)
       _AddTest(MatrixSolveLsOpTest, "MatrixSolveLsOpTest", name, test_case)

diff --git a/tensorflow/python/kernel_tests/qr_op_test.py b/tensorflow/python/kernel_tests/qr_op_test.py
@@ -121,7 +121,8 @@ def Test(self):
           low=-1.0, high=1.0,
           size=np.prod(shape_)).reshape(shape_).astype(dtype_)
 
-    with self.test_session(use_gpu=True) as sess:
+    # rocBLAS on ROCm stack does not support TRSM yet
+    with self.test_session(use_gpu=True and not test.is_built_with_rocm()) as sess:
       if use_static_shape_:
         x_tf = constant_op.constant(x_np)
       else:
@@ -173,7 +174,8 @@ def Test(self):
       tol = 3e-2
     else:
       tol = 1e-6
-    with self.test_session(use_gpu=True):
+    # rocBLAS on ROCm stack does not support TRSM yet
+    with self.test_session(use_gpu=True and not test.is_built_with_rocm()):
       tf_a = constant_op.constant(a)
       tf_b = linalg_ops.qr(tf_a, full_matrices=full_matrices_)
       for b in tf_b:

diff --git a/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py b/tensorflow/python/kernel_tests/self_adjoint_eig_op_test.py
@@ -240,8 +240,13 @@ def Test(self):
 
 if __name__ == "__main__":
   for compute_v in True, False:
-    for dtype in (dtypes_lib.float32, dtypes_lib.float64, dtypes_lib.complex64,
-                  dtypes_lib.complex128):
+    dtypes_to_test = (dtypes_lib.float32, dtypes_lib.float64,
+                      dtypes_lib.complex64, dtypes_lib.complex128)
+    if test.is_built_with_rocm():
+      # rocBLAS library on ROCm stack doesn't properly support fp16 and
+      # complex numbers yet
+      dtypes_to_test = (dtypes_lib.float32, dtypes_lib.float64)
+    for dtype in dtypes_to_test:
       for size in 1, 2, 5, 10:
         for batch_dims in [(), (3,)] + [(3, 2)] * (max(size, size) < 10):
           shape = batch_dims + (size, size)

diff --git a/tensorflow/python/kernel_tests/svd_op_test.py b/tensorflow/python/kernel_tests/svd_op_test.py
@@ -258,7 +258,10 @@ def Test(self):
 if __name__ == "__main__":
   for compute_uv in False, True:
     for full_matrices in False, True:
-      for dtype in np.float32, np.float64, np.complex64, np.complex128:
+      # rocBLAS on ROCm stack doesn't support complex64 and complex128 types
+      dtypes = ([np.float32, np.float64] +
+                [np.complex64, np.complex128] * (not test.is_built_with_rocm()))
+      for dtype in dtypes:
         for rows in 1, 2, 5, 10, 32, 100:
           for cols in 1, 2, 5, 10, 32, 100:
             for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10):
@@ -272,8 +275,10 @@ def Test(self):
                                        compute_uv, full_matrices))
   for compute_uv in False, True:
     for full_matrices in False, True:
-      dtypes = ([np.float32, np.float64]
-                + [np.complex64, np.complex128] * (not compute_uv))
+      # rocBLAS on ROCm stack doesn't support complex64 and complex128 types
+      dtypes = ([np.float32, np.float64] +
+                [np.complex64, np.complex128] * (not compute_uv)
+                                              * (not test.is_built_with_rocm()))
       for dtype in dtypes:
         mat_shapes = [(10, 11), (11, 10), (11, 11)]
         if not full_matrices or not compute_uv:

diff --git a/tensorflow/python/kernel_tests/tensordot_op_test.py b/tensorflow/python/kernel_tests/tensordot_op_test.py
@@ -216,7 +216,13 @@ def test_tensordot_scalar_axes(self):
 
 
 if __name__ == "__main__":
-  for dtype in np.float16, np.float32, np.float64, np.complex64, np.complex128:
+  dtypes_to_test = [np.float16, np.float32, np.float64, np.complex64,
+                    np.complex128]
+  if test_lib.is_built_with_rocm():
+    # rocBLAS library on ROCm stack doesn't properly support fp16 and
+    # complex numbers yet
+    dtypes_to_test = [np.float32, np.float64]
+  for dtype in dtypes_to_test:
     for rank_a in 1, 2, 4, 5:
       for rank_b in 1, 2, 4, 5:
         for num_dims in range(0, min(rank_a, rank_b) + 1):

diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py
@@ -229,7 +229,11 @@ def testSparseNesterovMomentum(self):
 
   @test_util.run_in_graph_and_eager_modes(reset_test=True)
   def testMinimizeSparseResourceVariable(self):
-    for dtype in [dtypes.half, dtypes.float32, dtypes.float64]:
+    dtypes_to_test = [dtypes.half, dtypes.float32, dtypes.float64]
+    if test.is_built_with_rocm():
+      # rocBLAS on ROCM stack doesn't support fp16 sgemm yet
+      dtypes_to_test = [dtypes.float32, dtypes.float64]
+    for dtype in dtypes_to_test:
       # This test invokes the ResourceSparseApplyMomentum operation, which
       # did not have a registered GPU kernel as of April 2018. With graph
       # execution, the placement algorithm notices this and automatically

diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
@@ -44,10 +44,8 @@ bazel test --test_sharding_strategy=disabled --config=rocm --test_tag_filters=-n
     -//tensorflow/python/keras:normalization_test \
     -//tensorflow/python/keras:pooling_test \
     -//tensorflow/python/kernel_tests:atrous_conv2d_test \
-    -//tensorflow/python/kernel_tests:batch_matmul_op_test \
     -//tensorflow/python/kernel_tests:bias_op_test \
     -//tensorflow/python/kernel_tests:bincount_op_test \
-    -//tensorflow/python/kernel_tests:cholesky_op_test \
     -//tensorflow/python/kernel_tests:concat_op_test \
     -//tensorflow/python/kernel_tests:control_flow_ops_py_test \
     -//tensorflow/python/kernel_tests:conv_ops_3d_test \
@@ -60,27 +58,19 @@ bazel test --test_sharding_strategy=disabled --config=rocm --test_tag_filters=-n
     -//tensorflow/python/kernel_tests:fft_ops_test \
     -//tensorflow/python/kernel_tests:functional_ops_test \
     -//tensorflow/python/kernel_tests:init_ops_test \
-    -//tensorflow/python/kernel_tests:linalg_grad_test \
-    -//tensorflow/python/kernel_tests:linalg_ops_test \
     -//tensorflow/python/kernel_tests:losses_test \
     -//tensorflow/python/kernel_tests:lrn_op_test \
-    -//tensorflow/python/kernel_tests:matmul_op_test \
     -//tensorflow/python/kernel_tests:matrix_inverse_op_test \
-    -//tensorflow/python/kernel_tests:matrix_solve_ls_op_test \
     -//tensorflow/python/kernel_tests:matrix_triangular_solve_op_test \
     -//tensorflow/python/kernel_tests:metrics_test \
     -//tensorflow/python/kernel_tests:neon_depthwise_conv_op_test \
     -//tensorflow/python/kernel_tests:pool_test \
     -//tensorflow/python/kernel_tests:pooling_ops_3d_test \
     -//tensorflow/python/kernel_tests:pooling_ops_test \
-    -//tensorflow/python/kernel_tests:qr_op_test \
     -//tensorflow/python/kernel_tests:reduction_ops_test   \
     -//tensorflow/python/kernel_tests:scatter_nd_ops_test \
     -//tensorflow/python/kernel_tests:scatter_ops_test \
     -//tensorflow/python/kernel_tests:segment_reduction_ops_test \
-    -//tensorflow/python/kernel_tests:self_adjoint_eig_op_test \
-    -//tensorflow/python/kernel_tests:svd_op_test \
-    -//tensorflow/python/kernel_tests:tensordot_op_test \
     -//tensorflow/python/profiler/internal:run_metadata_test \
     -//tensorflow/python/profiler:profile_context_test \
     -//tensorflow/python/profiler:profiler_test \
@@ -98,7 +88,6 @@ bazel test --test_sharding_strategy=disabled --config=rocm --test_tag_filters=-n
     -//tensorflow/python:timeline_test \
     -//tensorflow/python:virtual_gpu_test \
     -//tensorflow/python:function_def_to_graph_test \
-    -//tensorflow/python:momentum_test \
     -//tensorflow/python/keras:models_test \
     -//tensorflow/python/keras:training_test \
     -//tensorflow/python/keras:cudnn_recurrent_test \
@@ -109,5 +98,3 @@ bazel test --test_sharding_strategy=disabled --config=rocm --test_tag_filters=-n
     -//tensorflow/python/estimator:dnn_test \
     -//tensorflow/python/estimator:estimator_test \
     -//tensorflow/python/estimator:linear_test
-
-# Note: temp. disabling 87 unit tests in order to esablish a CI baseline (2018/06/13)