-
Notifications
You must be signed in to change notification settings - Fork 400
Allow public tuning of cub::DeviceAdjacentDifference
#9218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,43 +22,44 @@ | |
|
|
||
| CUB_NAMESPACE_BEGIN | ||
|
|
||
| namespace detail::adjacent_difference | ||
| { | ||
| struct adjacent_difference_policy | ||
| //! The tuning policy for all algorithms in @ref DeviceAdjacentDifference. | ||
| struct AdjacentDifferencePolicy | ||
| { | ||
| int threads_per_block; | ||
| int items_per_thread; | ||
| BlockLoadAlgorithm load_algorithm; | ||
| CacheLoadModifier load_modifier; | ||
| BlockStoreAlgorithm store_algorithm; | ||
|
|
||
| _CCCL_HOST_DEVICE_API constexpr friend bool | ||
| operator==(const adjacent_difference_policy& lhs, const adjacent_difference_policy& rhs) | ||
| int threads_per_block; //!< Number of threads in a CUDA block | ||
| int items_per_thread; //!< Number of items processed per thread | ||
| BlockLoadAlgorithm load_algorithm; //!< The @ref BlockLoadAlgorithm used for loading items from global memory | ||
| CacheLoadModifier load_modifier; //!< The @ref CacheLoadModifier used for loading items from global memory | ||
| BlockStoreAlgorithm store_algorithm; //!< The @ref BlockStoreAlgorithm used for storing items to global memory | ||
|
|
||
| [[nodiscard]] _CCCL_HOST_DEVICE_API constexpr friend bool | ||
| operator==(const AdjacentDifferencePolicy& lhs, const AdjacentDifferencePolicy& rhs) | ||
| { | ||
| return lhs.threads_per_block == rhs.threads_per_block && lhs.items_per_thread == rhs.items_per_thread | ||
| && lhs.load_algorithm == rhs.load_algorithm && lhs.load_modifier == rhs.load_modifier | ||
| && lhs.store_algorithm == rhs.store_algorithm; | ||
| } | ||
|
|
||
| _CCCL_HOST_DEVICE_API constexpr friend bool | ||
| operator!=(const adjacent_difference_policy& lhs, const adjacent_difference_policy& rhs) | ||
| [[nodiscard]] _CCCL_HOST_DEVICE_API constexpr friend bool | ||
| operator!=(const AdjacentDifferencePolicy& lhs, const AdjacentDifferencePolicy& rhs) | ||
| { | ||
| return !(lhs == rhs); | ||
| } | ||
|
|
||
| #if _CCCL_HOSTED() | ||
| friend ::std::ostream& operator<<(::std::ostream& os, const adjacent_difference_policy& p) | ||
| friend ::std::ostream& operator<<(::std::ostream& os, const AdjacentDifferencePolicy& p) | ||
| { | ||
| return os << "adjacent_difference_policy { .threads_per_block = " << p.threads_per_block | ||
| return os << "AdjacentDifferencePolicy { .threads_per_block = " << p.threads_per_block | ||
| << ", .items_per_thread = " << p.items_per_thread << ", .load_algorithm = " << p.load_algorithm | ||
| << ", .load_modifier = " << p.load_modifier << ", .store_algorithm = " << p.store_algorithm << " }"; | ||
| } | ||
| #endif // _CCCL_HOSTED() | ||
| }; | ||
|
|
||
| namespace detail::adjacent_difference | ||
| { | ||
| #if _CCCL_HAS_CONCEPTS() | ||
| template <typename T> | ||
| concept adjacent_difference_policy_selector = policy_selector<T, adjacent_difference_policy>; | ||
| concept adjacent_difference_policy_selector = policy_selector<T, AdjacentDifferencePolicy>; | ||
| #endif // _CCCL_HAS_CONCEPTS() | ||
|
Comment on lines
61
to
63
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. important: Constrain the public selector concept to stateless selectors.
|
||
|
|
||
| struct policy_selector | ||
|
|
@@ -67,9 +68,9 @@ struct policy_selector | |
| bool may_alias; | ||
|
|
||
| [[nodiscard]] _CCCL_HOST_DEVICE_API constexpr auto operator()(::cuda::compute_capability) const | ||
| -> adjacent_difference_policy | ||
| -> AdjacentDifferencePolicy | ||
| { | ||
| return adjacent_difference_policy{ | ||
| return AdjacentDifferencePolicy{ | ||
| 128, | ||
| nominal_8B_items_to_items(7, value_type_size), | ||
| BLOCK_LOAD_WARP_TRANSPOSE, | ||
|
|
@@ -87,7 +88,7 @@ template <typename InputIteratorT, bool MayAlias> | |
| struct policy_selector_from_types | ||
| { | ||
| [[nodiscard]] _CCCL_HOST_DEVICE_API constexpr auto operator()(::cuda::compute_capability cc) const | ||
| -> adjacent_difference_policy | ||
| -> AdjacentDifferencePolicy | ||
| { | ||
| constexpr auto policies = policy_selector{static_cast<int>(sizeof(it_value_t<InputIteratorT>)), MayAlias}; | ||
| return policies(cc); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🌐 Web query:
Does C++17 support designated initializers likeT{ .field = value }, or is that a C++20-only feature?💡 Result:
Designated initializers, using the syntax T{.field = value }, are a C++20-only feature [1][2][3][4]. They are not part of the C++17 standard [1]. While some compilers (such as GCC and Clang) provided designated initializers as a non-standard language extension long before C++20 [1][5][6], they were not officially supported in C++17 [1]. If you use this syntax in C++17 mode, you are relying on compiler-specific extensions rather than standard C++ [1]. Compilers often allow this code to compile by default, but it may trigger errors or warnings if you compile with strict standard compliance flags (such as -Wpedantic or -pedantic) [1][2]. In contrast, C++20 formally introduced designated initializers for aggregate types [2][3][4]. There are specific differences between the C++20 implementation and C99 designated initializers: for instance, C++20 requires that members be initialized in their declaration order, whereas C99 allows arbitrary order [3][4].
Citations:
important: The
@par Tuningdocs incub/cub/device/device_adjacent_difference.cuhinclude thesubtract-left-copy-policy-selectorexample fromcub/test/catch2_test_device_adjacent_difference_env_api.cu, which uses C++20 designated initializers (return {.threads_per_block = ...}). Copy-pasting this into a C++17 project will fail under standard-conforming builds; update the snippet to use C++17-compatible aggregate initialization (e.g., positional initialization) or mirror the existing_CCCL_STD_VER >= 2020fallback.