Block local memory tests on pascal (#329)

* Block local memory tests on pascal * Hide atomic lockfree macro test behind check for NVRTC
NVIDIA · Nov 4, 2022 · 3a5a94d · 3a5a94d
1 parent 37cc516
commit 3a5a94d
Show file tree

Hide file tree

Showing 7 changed files with 73 additions and 50 deletions.
diff --git a/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp b/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp
@@ -17,38 +17,54 @@
 // https://github.com/NVIDIA/libcudacxx/issues/160
 
 #include <cuda/atomic>
+#include "cuda_space_selector.h"
 
 template <typename T>
 __host__ __device__
 constexpr bool unused(T &&) {return true;}
 
-int main(int argc, char ** argv)
-{
-  // Test default aligned user type
-  {
-    struct key {
-      int32_t a;
-      int32_t b;
-    };
-    static_assert(alignof(key) == 4, "");
-    cuda::atomic<key> k(key{});
-    auto r = k.load();
-    k.store(r);
-    (void)k.exchange(r);
-    unused(r);
-  }
-  // Test forcibly aligned user type
-  {
-    struct alignas(8) key {
-      int32_t a;
-      int32_t b;
-    };
-    static_assert(alignof(key) == 8, "");
-    cuda::atomic<key> k(key{});
-    auto r = k.load();
-    k.store(r);
-    (void)k.exchange(r);
-    unused(r);
+template <template<typename, typename> typename Selector>
+struct TestFn {
+  __host__ __device__
+  void operator()() const {
+    {
+        struct key {
+          int32_t a;
+          int32_t b;
+        };
+        typedef cuda::std::atomic<key> A;
+        Selector<A, constructor_initializer> sel;
+        A & t = *sel.construct();
+        cuda::std::atomic_init(&t, key{1,2});
+        auto r = t.load();
+        t.store(r);
+        (void)t.exchange(r);
+    }
+    {
+        struct alignas(8) key {
+          int32_t a;
+          int32_t b;
+        };
+        typedef cuda::std::atomic<key> A;
+        Selector<A, constructor_initializer> sel;
+        A & t = *sel.construct();
+        cuda::std::atomic_init(&t, key{1,2});
+        auto r = t.load();
+        t.store(r);
+        (void)t.exchange(r);
+    }
   }
+};
+
+int main(int, char**)
+{
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
+    TestFn<local_memory_selector>()();
+#endif
+#ifdef __CUDA_ARCH__
+    TestFn<shared_memory_selector>()();
+    TestFn<global_memory_selector>()();
+#endif
+
   return 0;
 }
diff --git a/.upstream-tests/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp b/.upstream-tests/test/std/atomics/atomics.lockfree/isalwayslockfree.pass.cpp
@@ -18,9 +18,13 @@
 
 #include "test_macros.h"
 
+// NVRTC doesn't include host atomic making this feature test invalid
+// TODO: Should we define __cpp_lib_atomic_is_always_lock_free for NVRTC?
+#if !defined(__CUDACC_RTC__)
 #if !defined(__cpp_lib_atomic_is_always_lock_free)
 # error Feature test macro missing.
 #endif
+#endif
 
 template <typename T> __host__ __device__ void checkAlwaysLockFree() {
   if (cuda::std::atomic<T>::is_always_lock_free)

diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/atomic_copyable.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/atomic_copyable.pass.cpp
@@ -22,6 +22,7 @@
 // #include <cuda/std/chrono> // for nanoseconds
 
 #include "test_macros.h"
+#include "cuda_space_selector.h"
 
 template <class T>
 __host__ __device__
@@ -44,7 +45,8 @@ void test_copy_constructible() {
 template <class T, class A>
 __host__ __device__
 void test_atomic_ref_copy_ctor() {
-  A val = 0;
+  SHARED A val;
+  val = 0;
 
   T t0(val);
   T t1(t0);
@@ -58,7 +60,8 @@ void test_atomic_ref_copy_ctor() {
 template <class T, class A>
 __host__ __device__
 void test_atomic_ref_move_ctor() {
-  A val = 0;
+  SHARED A val;
+  val = 0;
 
   T t0(val);
   t0++;

diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp
@@ -170,15 +170,15 @@ int main(int, char**)
     // confidence that this all actually works
 
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_block, local_memory_selector>();
 #endif
 #ifdef __CUDA_ARCH__
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_block, shared_memory_selector>();
 
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_device, global_memory_selector>();
 #endif
 
   return 0;

diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp
@@ -111,14 +111,14 @@ int main(int, char**)
 
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
     test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_system, local_memory_selector>();
 #endif
 #ifdef __CUDA_ARCH__
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_block, shared_memory_selector>();
 
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_device, global_memory_selector>();
 #endif
 
   return 0;

diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref.pass.cpp
@@ -202,14 +202,14 @@ int main(int, char**)
 
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
     test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_system, local_memory_selector>();
 #endif
 #ifdef __CUDA_ARCH__
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_block, shared_memory_selector>();
 
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_device, global_memory_selector>();
 #endif
 
   return 0;

diff --git a/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp b/.upstream-tests/test/std/atomics/atomics.types.generic/integral_ref_constness.pass.cpp
@@ -202,14 +202,14 @@ int main(int, char**)
 
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
     test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_system, local_memory_selector>();
 #endif
 #ifdef __CUDA_ARCH__
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_block, shared_memory_selector>();
 
-    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
-    test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
+    test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
+    test_for_all_types<cuda_atomic_ref    , cuda::thread_scope_device, global_memory_selector>();
 #endif
 
   return 0;