Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion cub/cub/agent/agent_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@

CUB_NAMESPACE_BEGIN

namespace detail
{
template <int ThreadsPerBlock,
int ItemsPerThread = 1,
cub::BlockLoadAlgorithm LoadAlgorithm = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier LoadModifier = cub::LOAD_LDG,
cub::BlockStoreAlgorithm StoreAlgorithm = cub::BLOCK_STORE_DIRECT>
struct AgentAdjacentDifferencePolicy
struct agent_adjacent_difference_policy
{
static constexpr int BLOCK_THREADS = ThreadsPerBlock;
static constexpr int ITEMS_PER_THREAD = ItemsPerThread;
Expand All @@ -39,6 +41,15 @@ struct AgentAdjacentDifferencePolicy
static constexpr cub::CacheLoadModifier LOAD_MODIFIER = LoadModifier;
static constexpr cub::BlockStoreAlgorithm STORE_ALGORITHM = StoreAlgorithm;
};
} // namespace detail

template <int ThreadsPerBlock,
int ItemsPerThread = 1,
cub::BlockLoadAlgorithm LoadAlgorithm = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier LoadModifier = cub::LOAD_LDG,
cub::BlockStoreAlgorithm StoreAlgorithm = cub::BLOCK_STORE_DIRECT>
using AgentAdjacentDifferencePolicy CCCL_DEPRECATED_BECAUSE("Use the tuning API for DeviceAdjacentDifference") =
detail::agent_adjacent_difference_policy<ThreadsPerBlock, ItemsPerThread, LoadAlgorithm, LoadModifier, StoreAlgorithm>;

namespace detail::adjacent_difference
{
Expand Down
10 changes: 5 additions & 5 deletions cub/cub/device/dispatch/dispatch_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ _CCCL_KERNEL_ATTRIBUTES void DeviceAdjacentDifferenceDifferenceKernel(
static_assert(::cuda::std::is_empty_v<PolicySelector>);
static constexpr AdjacentDifferencePolicy policy = current_policy<PolicySelector>();
using AdjacentDifferencePolicyT =
AgentAdjacentDifferencePolicy<policy.threads_per_block,
policy.items_per_thread,
policy.load_algorithm,
policy.load_modifier,
policy.store_algorithm>;
agent_adjacent_difference_policy<policy.threads_per_block,
policy.items_per_thread,
policy.load_algorithm,
policy.load_modifier,
policy.store_algorithm>;

// It is OK to introspect the return type or parameter types of the
// `operator()` function of `__device__` extended lambda within device code.
Expand Down
10 changes: 5 additions & 5 deletions cub/cub/device/dispatch/tuning/tuning_adjacent_difference.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,11 @@ struct policy_hub
struct Policy500 : ChainedPolicy<500, Policy500, Policy500>
{
using AdjacentDifferencePolicy =
AgentAdjacentDifferencePolicy<128,
Nominal8BItemsToItems<ValueT>(7),
BLOCK_LOAD_WARP_TRANSPOSE,
MayAlias ? LOAD_CA : LOAD_LDG,
BLOCK_STORE_WARP_TRANSPOSE>;
agent_adjacent_difference_policy<128,
Nominal8BItemsToItems<ValueT>(7),
BLOCK_LOAD_WARP_TRANSPOSE,
MayAlias ? LOAD_CA : LOAD_LDG,
BLOCK_STORE_WARP_TRANSPOSE>;
};

using MaxPolicy = Policy500;
Expand Down
43 changes: 27 additions & 16 deletions cub/cub/device/dispatch/tuning/tuning_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@

CUB_NAMESPACE_BEGIN

namespace detail
{
template <int ThreadsPerBlock,
int ItemsPerThread = 1,
cub::BlockLoadAlgorithm LoadAlgorithm = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier LoadModifier = cub::LOAD_LDG,
cub::BlockStoreAlgorithm StoreAlgorithm = cub::BLOCK_STORE_DIRECT>
struct AgentMergeSortPolicy
struct agent_merge_sort_policy
{
static constexpr int BLOCK_THREADS = ThreadsPerBlock;
static constexpr int ITEMS_PER_THREAD = ItemsPerThread;
Expand All @@ -40,6 +42,15 @@ struct AgentMergeSortPolicy
static constexpr cub::CacheLoadModifier LOAD_MODIFIER = LoadModifier;
static constexpr cub::BlockStoreAlgorithm STORE_ALGORITHM = StoreAlgorithm;
};
} // namespace detail

template <int ThreadsPerBlock,
int ItemsPerThread = 1,
cub::BlockLoadAlgorithm LoadAlgorithm = cub::BLOCK_LOAD_DIRECT,
cub::CacheLoadModifier LoadModifier = cub::LOAD_LDG,
cub::BlockStoreAlgorithm StoreAlgorithm = cub::BLOCK_STORE_DIRECT>
using AgentMergeSortPolicy CCCL_DEPRECATED_BECAUSE("Use the tuning API for DeviceMergeSort") =
detail::agent_merge_sort_policy<ThreadsPerBlock, ItemsPerThread, LoadAlgorithm, LoadModifier, StoreAlgorithm>;

//! The tuning policy for all algorithms in @ref DeviceMergeSort.
struct MergeSortPolicy
Expand Down Expand Up @@ -85,11 +96,11 @@ struct policy_hub
struct Policy500 : ChainedPolicy<500, Policy500, Policy500>
{
using MergeSortPolicy =
AgentMergeSortPolicy<256,
Nominal4BItemsToItems<KeyT>(11),
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_LDG,
BLOCK_STORE_WARP_TRANSPOSE>;
agent_merge_sort_policy<256,
Nominal4BItemsToItems<KeyT>(11),
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_LDG,
BLOCK_STORE_WARP_TRANSPOSE>;
};

// NVBug 3384810
Expand All @@ -99,22 +110,22 @@ struct policy_hub
struct Policy520 : ChainedPolicy<520, Policy520, Policy500>
{
using MergeSortPolicy =
AgentMergeSortPolicy<512,
Nominal4BItemsToItems<KeyT>(15),
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_LDG,
BLOCK_STORE_WARP_TRANSPOSE>;
agent_merge_sort_policy<512,
Nominal4BItemsToItems<KeyT>(15),
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_LDG,
BLOCK_STORE_WARP_TRANSPOSE>;
};
#endif

struct Policy600 : ChainedPolicy<600, Policy600, Policy520>
{
using MergeSortPolicy =
AgentMergeSortPolicy<256,
Nominal4BItemsToItems<KeyT>(17),
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
BLOCK_STORE_WARP_TRANSPOSE>;
agent_merge_sort_policy<256,
Nominal4BItemsToItems<KeyT>(17),
BLOCK_LOAD_WARP_TRANSPOSE,
LOAD_DEFAULT,
BLOCK_STORE_WARP_TRANSPOSE>;
};

using MaxPolicy = Policy600;
Expand Down
Loading