Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/dev' into feature/static_multiset
Browse files Browse the repository at this point in the history
  • Loading branch information
PointKernel committed May 28, 2024
2 parents 662dc7d + 97d2404 commit 8f0f332
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 26 deletions.
46 changes: 27 additions & 19 deletions include/cuco/detail/open_addressing/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ template <int32_t CGSize,
typename Predicate,
typename AtomicT,
typename Ref>
CUCO_KERNEL void insert_if_n(InputIt first,
cuco::detail::index_type n,
StencilIt stencil,
Predicate pred,
AtomicT* num_successes,
Ref ref)
CUCO_KERNEL __launch_bounds__(BlockSize) void insert_if_n(InputIt first,
cuco::detail::index_type n,
StencilIt stencil,
Predicate pred,
AtomicT* num_successes,
Ref ref)
{
using BlockReduce = cub::BlockReduce<typename Ref::size_type, BlockSize>;
__shared__ typename BlockReduce::TempStorage temp_storage;
Expand Down Expand Up @@ -127,7 +127,7 @@ template <int32_t CGSize,
typename StencilIt,
typename Predicate,
typename Ref>
CUCO_KERNEL void insert_if_n(
CUCO_KERNEL __launch_bounds__(BlockSize) void insert_if_n(
InputIt first, cuco::detail::index_type n, StencilIt stencil, Predicate pred, Ref ref)
{
auto const loop_stride = cuco::detail::grid_stride() / CGSize;
Expand Down Expand Up @@ -162,7 +162,9 @@ CUCO_KERNEL void insert_if_n(
* @param ref Non-owning container device ref used to access the slot storage
*/
template <int32_t CGSize, int32_t BlockSize, typename InputIt, typename Ref>
CUCO_KERNEL void erase(InputIt first, cuco::detail::index_type n, Ref ref)
CUCO_KERNEL __launch_bounds__(BlockSize) void erase(InputIt first,
cuco::detail::index_type n,
Ref ref)
{
auto const loop_stride = cuco::detail::grid_stride() / CGSize;
auto idx = cuco::detail::global_thread_id() / CGSize;
Expand Down Expand Up @@ -212,12 +214,12 @@ template <int32_t CGSize,
typename Predicate,
typename OutputIt,
typename Ref>
CUCO_KERNEL void contains_if_n(InputIt first,
cuco::detail::index_type n,
StencilIt stencil,
Predicate pred,
OutputIt output_begin,
Ref ref)
CUCO_KERNEL __launch_bounds__(BlockSize) void contains_if_n(InputIt first,
cuco::detail::index_type n,
StencilIt stencil,
Predicate pred,
OutputIt output_begin,
Ref ref)
{
namespace cg = cooperative_groups;

Expand Down Expand Up @@ -274,7 +276,10 @@ CUCO_KERNEL void contains_if_n(InputIt first,
* @param ref Non-owning container device ref used to access the slot storage
*/
template <int32_t CGSize, int32_t BlockSize, typename InputIt, typename OutputIt, typename Ref>
CUCO_KERNEL void find(InputIt first, cuco::detail::index_type n, OutputIt output_begin, Ref ref)
CUCO_KERNEL __launch_bounds__(BlockSize) void find(InputIt first,
cuco::detail::index_type n,
OutputIt output_begin,
Ref ref)
{
namespace cg = cooperative_groups;

Expand Down Expand Up @@ -382,7 +387,9 @@ CUCO_KERNEL void count(InputIt first, cuco::detail::index_type n, AtomicT* count
* @param count Number of filled slots
*/
template <int32_t BlockSize, typename StorageRef, typename Predicate, typename AtomicT>
CUCO_KERNEL void size(StorageRef storage, Predicate is_filled, AtomicT* count)
CUCO_KERNEL __launch_bounds__(BlockSize) void size(StorageRef storage,
Predicate is_filled,
AtomicT* count)
{
using size_type = typename StorageRef::size_type;

Expand All @@ -408,9 +415,10 @@ CUCO_KERNEL void size(StorageRef storage, Predicate is_filled, AtomicT* count)
}

template <int32_t BlockSize, typename ContainerRef, typename Predicate>
CUCO_KERNEL void rehash(typename ContainerRef::storage_ref_type storage_ref,
ContainerRef container_ref,
Predicate is_filled)
CUCO_KERNEL __launch_bounds__(BlockSize) void rehash(
typename ContainerRef::storage_ref_type storage_ref,
ContainerRef container_ref,
Predicate is_filled)
{
namespace cg = cooperative_groups;

Expand Down
4 changes: 3 additions & 1 deletion include/cuco/detail/static_map/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ CUCO_SUPPRESS_KERNEL_WARNINGS
* @param ref Non-owning container device ref used to access the slot storage
*/
template <int32_t CGSize, int32_t BlockSize, typename InputIt, typename Ref>
CUCO_KERNEL void insert_or_assign(InputIt first, cuco::detail::index_type n, Ref ref)
CUCO_KERNEL __launch_bounds__(BlockSize) void insert_or_assign(InputIt first,
cuco::detail::index_type n,
Ref ref)
{
auto const loop_stride = cuco::detail::grid_stride() / CGSize;
auto idx = cuco::detail::global_thread_id() / CGSize;
Expand Down
12 changes: 6 additions & 6 deletions include/cuco/detail/static_set/kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -267,12 +267,12 @@ template <int32_t BlockSize,
typename OutputIt2,
typename AtomicT,
typename Ref>
CUCO_KERNEL void retrieve(InputIt first,
cuco::detail::index_type n,
OutputIt1 output_probe,
OutputIt2 output_match,
AtomicT* counter,
Ref ref)
CUCO_KERNEL __launch_bounds__(BlockSize) void retrieve(InputIt first,
cuco::detail::index_type n,
OutputIt1 output_probe,
OutputIt2 output_match,
AtomicT* counter,
Ref ref)
{
// Scalar retrieve without using CG
if constexpr (Ref::cg_size == 1) {
Expand Down

0 comments on commit 8f0f332

Please sign in to comment.