Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Block local memory tests on pascal #329

Merged
merged 3 commits into from
Nov 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 43 additions & 27 deletions .upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,38 +17,54 @@
// https://github.com/NVIDIA/libcudacxx/issues/160

#include <cuda/atomic>
#include "cuda_space_selector.h"

template <typename T>
__host__ __device__
constexpr bool unused(T &&) {return true;}

int main(int argc, char ** argv)
{
// Test default aligned user type
{
struct key {
int32_t a;
int32_t b;
};
static_assert(alignof(key) == 4, "");
cuda::atomic<key> k(key{});
auto r = k.load();
k.store(r);
(void)k.exchange(r);
unused(r);
}
// Test forcibly aligned user type
{
struct alignas(8) key {
int32_t a;
int32_t b;
};
static_assert(alignof(key) == 8, "");
cuda::atomic<key> k(key{});
auto r = k.load();
k.store(r);
(void)k.exchange(r);
unused(r);
template <template<typename, typename> typename Selector>
struct TestFn {
__host__ __device__
void operator()() const {
{
struct key {
int32_t a;
int32_t b;
};
typedef cuda::std::atomic<key> A;
Selector<A, constructor_initializer> sel;
A & t = *sel.construct();
cuda::std::atomic_init(&t, key{1,2});
auto r = t.load();
t.store(r);
(void)t.exchange(r);
}
{
struct alignas(8) key {
int32_t a;
int32_t b;
};
typedef cuda::std::atomic<key> A;
Selector<A, constructor_initializer> sel;
A & t = *sel.construct();
cuda::std::atomic_init(&t, key{1,2});
auto r = t.load();
t.store(r);
(void)t.exchange(r);
}
}
};

int main(int, char**)
{
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
TestFn<local_memory_selector>()();
#endif
#ifdef __CUDA_ARCH__
TestFn<shared_memory_selector>()();
TestFn<global_memory_selector>()();
#endif

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@

#include "test_macros.h"

// NVRTC doesn't include host atomic making this feature test invalid
// TODO: Should we define __cpp_lib_atomic_is_always_lock_free for NVRTC?
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@griwes Food for thought regarding macro. We don't do this now for anything so it's going to have to be another PR.

#if !defined(__CUDACC_RTC__)
#if !defined(__cpp_lib_atomic_is_always_lock_free)
# error Feature test macro missing.
#endif
#endif

template <typename T> __host__ __device__ void checkAlwaysLockFree() {
if (cuda::std::atomic<T>::is_always_lock_free)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
// #include <cuda/std/chrono> // for nanoseconds

#include "test_macros.h"
#include "cuda_space_selector.h"

template <class T>
__host__ __device__
Expand All @@ -44,7 +45,8 @@ void test_copy_constructible() {
template <class T, class A>
__host__ __device__
void test_atomic_ref_copy_ctor() {
A val = 0;
SHARED A val;
val = 0;

T t0(val);
T t1(t0);
Expand All @@ -58,7 +60,8 @@ void test_atomic_ref_copy_ctor() {
template <class T, class A>
__host__ __device__
void test_atomic_ref_move_ctor() {
A val = 0;
SHARED A val;
val = 0;

T t0(val);
t0++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,15 +170,15 @@ int main(int, char**)
// confidence that this all actually works

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, local_memory_selector>();
#endif
#ifdef __CUDA_ARCH__
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, shared_memory_selector>();

test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_device, global_memory_selector>();
#endif

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,14 @@ int main(int, char**)

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_system, local_memory_selector>();
#endif
#ifdef __CUDA_ARCH__
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, shared_memory_selector>();

test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_device, global_memory_selector>();
#endif

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,14 @@ int main(int, char**)

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_system, local_memory_selector>();
#endif
#ifdef __CUDA_ARCH__
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, shared_memory_selector>();

test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_device, global_memory_selector>();
#endif

return 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,14 +202,14 @@ int main(int, char**)

#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 700
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_system, local_memory_selector>();
#endif
#ifdef __CUDA_ARCH__
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_block, local_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_block, shared_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_block, shared_memory_selector>();

test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_system, local_memory_selector>();
test_for_all_types<cuda_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_std_atomic_ref, cuda::thread_scope_device, global_memory_selector>();
test_for_all_types<cuda_atomic_ref , cuda::thread_scope_device, global_memory_selector>();
#endif

return 0;
Expand Down