Merge pull request tensorflow#54 from ROCmSoftwarePlatform/deven_unit…

…_test_fixes_180626 Special casing GpuAtomicMin / GpuAtomicMax for ROCm
Cerebras · Jun 27, 2018 · 640398b · 640398b
2 parents e703ec9 + b2c2cd3
commit 640398b
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 2 deletions.
diff --git a/tensorflow/core/util/gpu_device_functions.h b/tensorflow/core/util/gpu_device_functions.h
@@ -648,7 +648,34 @@ template <typename T, typename U>
 __device__ detail::ToTypeIfConvertible<U, T> GpuAtomicMax(T* ptr, U value) {
   return atomicMax(ptr, value);
 }
+
+#if TENSORFLOW_USE_ROCM
 
+/*
+ * CUDA runtime headers have the following defined
+ *   __device__  int max(int, int)
+ *   __device__  float max(float, float)
+ *   __device__  double max(double, double)
+ * 
+ * and many others, where as HIP runtime headers only have the "int" version
+ * 
+ * Therefore need to special case ROCm version to call the correct underlying 
+ * routines for float and double types.
+ *
+ */
+
+__device__ inline float GpuAtomicMax(float* ptr, float value) {
+  return detail::GpuAtomicCasHelper(
+      ptr, [value](float a) { return fmaxf(a, value); });
+}
+
+__device__ inline double GpuAtomicMax(double* ptr, double value) {
+  return detail::GpuAtomicCasHelper(
+      ptr, [value](double a) { return fmax(a, value); });
+}
+
+#else
+
 __device__ inline float GpuAtomicMax(float* ptr, float value) {
   return detail::GpuAtomicCasHelper(
       ptr, [value](float a) { return max(a, value); });
@@ -659,6 +686,8 @@ __device__ inline double GpuAtomicMax(double* ptr, double value) {
       ptr, [value](double a) { return max(a, value); });
 }
 
+#endif
+
 __device__ inline Eigen::half GpuAtomicMax(Eigen::half* ptr,
                                             Eigen::half value) {
   return detail::GpuAtomicCasHelper(
@@ -678,7 +707,34 @@ template <typename T, typename U>
 __device__ detail::ToTypeIfConvertible<U, T> GpuAtomicMin(T* ptr, U value) {
   return atomicMin(ptr, value);
 }
+
+#if TENSORFLOW_USE_ROCM
+
+/* 
+ * CUDA runtime headers have the following defined
+ *   __device__  int min(int, int)
+ *   __device__  float min(float, float)
+ *   __device__  double min(double, double)
+ * 
+ * and many others, where as HIP runtime headers only have the "int" version
+ * 
+ * Therefore need to special case ROCm version to call the correct underlying 
+ * routines for float and double types.
+ *
+ */ 
 
+__device__ inline float GpuAtomicMin(float* ptr, float value) {
+  return detail::GpuAtomicCasHelper(
+      ptr, [value](float a) { return fminf(a, value); });
+}
+
+__device__ inline double GpuAtomicMin(double* ptr, double value) {
+  return detail::GpuAtomicCasHelper(
+      ptr, [value](double a) { return fmin(a, value); });
+}
+
+#else
+
 __device__ inline float GpuAtomicMin(float* ptr, float value) {
   return detail::GpuAtomicCasHelper(
       ptr, [value](float a) { return min(a, value); });
@@ -689,6 +745,8 @@ __device__ inline double GpuAtomicMin(double* ptr, double value) {
       ptr, [value](double a) { return min(a, value); });
 }
 
+#endif
+
 __device__ inline Eigen::half GpuAtomicMin(Eigen::half* ptr,
                                             Eigen::half value) {
   return detail::GpuAtomicCasHelper(

diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh
@@ -69,8 +69,6 @@ bazel test --test_sharding_strategy=disabled --config=rocm --test_tag_filters=-n
     -//tensorflow/python/kernel_tests:pool_test \
     -//tensorflow/python/kernel_tests:pooling_ops_3d_test \
     -//tensorflow/python/kernel_tests:pooling_ops_test \
-    -//tensorflow/python/kernel_tests:reduction_ops_test   \
-    -//tensorflow/python/kernel_tests:scatter_ops_test \
     -//tensorflow/python/profiler/internal:run_metadata_test \
     -//tensorflow/python/profiler:profile_context_test \
     -//tensorflow/python/profiler:profiler_test \