Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Block local memory tests on pascal (#329)
Browse files Browse the repository at this point in the history
* Block local memory tests on pascal
* Hide atomic lockfree macro test behind check for NVRTC
  • Loading branch information
wmaxey committed Nov 4, 2022
1 parent 37cc516 commit 3a5a94d
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 50 deletions.
70 changes: 43 additions & 27 deletions .upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,54 @@
// https://github.com/NVIDIA/libcudacxx/issues/160

#include <cuda/atomic>
#include "cuda_space_selector.h"

template <typename T>
__host__ __device__
constexpr bool unused(T &&) {return true;}

int main(int argc, char ** argv)
{
// Test default aligned user type
{
struct key {
int32_t a;
int32_t b;
};
static_assert(alignof(key) == 4, "");
cuda::atomic<key> k(key{});
auto r = k.load();
k.store(r);
(void)k.exchange(r);
unused(r);
}
// Test forcibly aligned user type
{
struct alignas(8) key {
int32_t a;
int32_t b;
};
static_assert(alignof(key) == 8, "");
cuda::atomic<key> k(key{});
auto r = k.load();
k.store(r);
(void)k.exchange(r);
unused(r);
template <template<typename, typename> typename Selector>
struct TestFn {
__host__ __device__
void operator()() const {
{
struct key {
int32_t a;
int32_t b;
};
typedef cuda::std::atomic<key> A;
Selector<A, constructor_initializer> sel;
A & t = *sel.construct();
cuda::std::atomic_init(&t, key{1,2});
auto r = t.load();
t.store(r);
(void)t.exchange(r);
}
{
struct alignas(8) key {
int32_t a;
int32_t b;
};
typedef cuda::std::atomic<key> A;
Selector<A, constructor_initializer> sel;
A & t = *sel.construct();
cuda::std::atomic_init(&t, key{1,2});
auto r = t.load();
t.store(r);
(void)t.exchange(r);
}
}
};

int main(int, char**)
{
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
TestFn<local_memory_selector>()();
#endif
#ifdef __CUDA_ARCH__
TestFn<shared_memory_selector>()();
TestFn<global_memory_selector>()();
#endif

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@

#include "test_macros.h"

// NVRTC doesn't include host atomic making this feature test invalid
// TODO: Should we define __cpp_lib_atomic_is_always_lock_free for NVRTC?
#if !defined(__CUDACC_RTC__)
#if !defined(__cpp_lib_atomic_is_always_lock_free)
# error Feature test macro missing.
#endif
#endif

template <typename T> __host__ __device__ void checkAlwaysLockFree() {
if (cuda::std::atomic<T>::is_always_lock_free)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
// #include <cuda/std/chrono> // for nanoseconds

#include "test_macros.h"
#include "cuda_space_selector.h"

template <class T>
__host__ __device__
Expand All @@ -44,7 +45,8 @@ void test_copy_constructible() {
template <class T, class A>
__host__ __device__
void test_atomic_ref_copy_ctor() {
A val = 0;
SHARED A val;
val = 0;

T t0(val);
T t1(t0);
Expand All @@ -58,7 +60,8 @@ void test_atomic_ref_copy_ctor() {
template <class T, class A>
__host__ __device__
void test_atomic_ref_move_ctor() {
A val = 0;
SHARED A val;
val = 0;

T t0(val);
t0++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,15 @@ int main(int, char**)
// confidence that this all actually works

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, local_memory_selector>();
#endif
#ifdef __CUDA_ARCH__
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, shared_memory_selector>();

test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_device, global_memory_selector>();
#endif

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,14 @@ int main(int, char**)

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_system, local_memory_selector>();
#endif
#ifdef __CUDA_ARCH__
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, shared_memory_selector>();

test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_device, global_memory_selector>();
#endif

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,14 @@ int main(int, char**)

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_system, local_memory_selector>();
#endif
#ifdef __CUDA_ARCH__
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, shared_memory_selector>();

test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_device, global_memory_selector>();
#endif

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,14 @@ int main(int, char**)

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_system, local_memory_selector>();
#endif
#ifdef __CUDA_ARCH__
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, shared_memory_selector>();

test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_device, global_memory_selector>();
#endif

return 0;
Expand Down

0 comments on commit 3a5a94d

Please sign in to comment.