diff --git a/cub/detail/temporary_storage.cuh b/cub/detail/temporary_storage.cuh index 70c67f342b..005a3763ef 100644 --- a/cub/detail/temporary_storage.cuh +++ b/cub/detail/temporary_storage.cuh @@ -132,7 +132,7 @@ class alias : m_slot(slot) , m_elements(elements) { - update_slot(); + this->update_slot(); } __host__ __device__ void update_slot() @@ -156,7 +156,7 @@ public: __host__ __device__ void grow(std::size_t new_elements) { m_elements = new_elements; - update_slot(); + this->update_slot(); } /** @@ -250,7 +250,7 @@ class layout slot m_slots[SlotsCount]; std::size_t m_sizes[SlotsCount]; void *m_pointers[SlotsCount]; - bool layout_was_mapped {}; + bool m_layout_was_mapped {}; public: layout() = default; @@ -270,7 +270,7 @@ public: */ __host__ __device__ std::size_t get_size() { - prepare_interface(); + this->prepare_interface(); // AliasTemporaries can return error only in mapping stage, // so it's safe to ignore it here. @@ -300,12 +300,12 @@ public: __host__ __device__ cudaError_t map_to_buffer(void *d_temp_storage, std::size_t temp_storage_bytes) { - if (layout_was_mapped) + if (m_layout_was_mapped) { return cudaErrorAlreadyMapped; } - prepare_interface(); + this->prepare_interface(); cudaError_t error = cudaSuccess; if ((error = AliasTemporaries(d_temp_storage, @@ -321,14 +321,14 @@ public: m_slots[slot_id].set_storage(m_pointers[slot_id]); } - layout_was_mapped = true; + m_layout_was_mapped = true; return error; } private: __host__ __device__ void prepare_interface() { - if (layout_was_mapped) + if (m_layout_was_mapped) { return; } diff --git a/cub/device/device_segmented_sort.cuh b/cub/device/device_segmented_sort.cuh index 7a3ded6ed3..bc80275fd0 100644 --- a/cub/device/device_segmented_sort.cuh +++ b/cub/device/device_segmented_sort.cuh @@ -171,9 +171,6 @@ struct DeviceSegmentedSort * @tparam KeyT * [inferred] Key type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -225,7 +222,6 @@ struct DeviceSegmentedSort * be printed to the console. Default is `false`. */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -233,8 +229,8 @@ struct DeviceSegmentedSort std::size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -245,7 +241,7 @@ struct DeviceSegmentedSort using DispatchT = DispatchSegmentedSort; @@ -319,9 +315,6 @@ struct DeviceSegmentedSort * @tparam KeyT * [inferred] Key type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -373,7 +366,6 @@ struct DeviceSegmentedSort * to be printed to the console. Default is @p false. */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -381,8 +373,8 @@ struct DeviceSegmentedSort std::size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -393,7 +385,7 @@ struct DeviceSegmentedSort using DispatchT = DispatchSegmentedSort; @@ -477,9 +469,6 @@ struct DeviceSegmentedSort * @tparam KeyT * [inferred] Key type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -530,15 +519,14 @@ struct DeviceSegmentedSort * be printed to the console. Default is @p false. */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeys(void *d_temp_storage, std::size_t &temp_storage_bytes, DoubleBuffer &d_keys, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -550,7 +538,7 @@ struct DeviceSegmentedSort using DispatchT = DispatchSegmentedSort; @@ -633,9 +621,6 @@ struct DeviceSegmentedSort * @tparam KeyT * [inferred] Key type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -686,15 +671,14 @@ struct DeviceSegmentedSort * to be printed to the console. Default is @p false. */ template CUB_RUNTIME_FUNCTION static cudaError_t SortKeysDescending(void *d_temp_storage, std::size_t &temp_storage_bytes, DoubleBuffer &d_keys, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -706,7 +690,7 @@ struct DeviceSegmentedSort using DispatchT = DispatchSegmentedSort; @@ -780,9 +764,6 @@ struct DeviceSegmentedSort * @tparam KeyT * [inferred] Key type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -834,7 +815,6 @@ struct DeviceSegmentedSort * to be printed to the console. Default is @p false. */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -842,14 +822,14 @@ struct DeviceSegmentedSort std::size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, bool debug_synchronous = false) { - return SortKeys( + return SortKeys( d_temp_storage, temp_storage_bytes, d_keys_in, @@ -917,9 +897,6 @@ struct DeviceSegmentedSort * @tparam KeyT * [inferred] Key type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -971,7 +948,6 @@ struct DeviceSegmentedSort * to be printed to the console. Default is @p false. */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -979,15 +955,14 @@ struct DeviceSegmentedSort std::size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, bool debug_synchronous = false) { return SortKeysDescending(d_temp_storage, temp_storage_bytes, @@ -1066,9 +1041,6 @@ struct DeviceSegmentedSort * @tparam KeyT * [inferred] Key type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -1119,21 +1091,20 @@ struct DeviceSegmentedSort * to be printed to the console. Default is @p false. */ template CUB_RUNTIME_FUNCTION static cudaError_t StableSortKeys(void *d_temp_storage, std::size_t &temp_storage_bytes, DoubleBuffer &d_keys, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, bool debug_synchronous = false) { - return SortKeys( + return SortKeys( d_temp_storage, temp_storage_bytes, d_keys, @@ -1210,9 +1181,6 @@ struct DeviceSegmentedSort * @tparam KeyT * [inferred] Key type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -1263,22 +1231,20 @@ struct DeviceSegmentedSort * to be printed to the console. Default is @p false. */ template CUB_RUNTIME_FUNCTION static cudaError_t StableSortKeysDescending(void *d_temp_storage, std::size_t &temp_storage_bytes, DoubleBuffer &d_keys, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, bool debug_synchronous = false) { return SortKeysDescending(d_temp_storage, temp_storage_bytes, @@ -1361,9 +1327,6 @@ struct DeviceSegmentedSort * @tparam ValueT * [inferred] Value type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -1424,7 +1387,6 @@ struct DeviceSegmentedSort */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -1434,8 +1396,8 @@ struct DeviceSegmentedSort KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -1446,7 +1408,7 @@ struct DeviceSegmentedSort using DispatchT = DispatchSegmentedSort; @@ -1529,9 +1491,6 @@ struct DeviceSegmentedSort * @tparam ValueT * [inferred] Value type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -1592,7 +1551,6 @@ struct DeviceSegmentedSort */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -1602,8 +1560,8 @@ struct DeviceSegmentedSort KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -1614,7 +1572,7 @@ struct DeviceSegmentedSort using DispatchT = DispatchSegmentedSort; @@ -1709,9 +1667,6 @@ struct DeviceSegmentedSort * @tparam ValueT * [inferred] Value type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -1768,7 +1723,6 @@ struct DeviceSegmentedSort */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -1776,8 +1730,8 @@ struct DeviceSegmentedSort std::size_t &temp_storage_bytes, DoubleBuffer &d_keys, DoubleBuffer &d_values, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -1788,7 +1742,7 @@ struct DeviceSegmentedSort using DispatchT = DispatchSegmentedSort; @@ -1880,9 +1834,6 @@ struct DeviceSegmentedSort * @tparam ValueT * [inferred] Value type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -1939,7 +1890,6 @@ struct DeviceSegmentedSort */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -1947,8 +1897,8 @@ struct DeviceSegmentedSort std::size_t &temp_storage_bytes, DoubleBuffer &d_keys, DoubleBuffer &d_values, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -1959,7 +1909,7 @@ struct DeviceSegmentedSort using DispatchT = DispatchSegmentedSort; @@ -2040,9 +1990,6 @@ struct DeviceSegmentedSort * @tparam ValueT * [inferred] Value type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -2102,7 +2049,6 @@ struct DeviceSegmentedSort */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -2112,8 +2058,8 @@ struct DeviceSegmentedSort KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -2121,7 +2067,6 @@ struct DeviceSegmentedSort { return SortPairs(d_temp_storage, temp_storage_bytes, @@ -2202,9 +2147,6 @@ struct DeviceSegmentedSort * @tparam ValueT * [inferred] Value type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -2265,7 +2207,6 @@ struct DeviceSegmentedSort */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -2275,8 +2216,8 @@ struct DeviceSegmentedSort KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -2284,7 +2225,6 @@ struct DeviceSegmentedSort { return SortPairsDescending(d_temp_storage, temp_storage_bytes, @@ -2376,9 +2316,6 @@ struct DeviceSegmentedSort * @tparam ValueT * [inferred] Value type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -2435,7 +2372,6 @@ struct DeviceSegmentedSort */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -2443,8 +2379,8 @@ struct DeviceSegmentedSort std::size_t &temp_storage_bytes, DoubleBuffer &d_keys, DoubleBuffer &d_values, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -2452,7 +2388,6 @@ struct DeviceSegmentedSort { return SortPairs(d_temp_storage, temp_storage_bytes, @@ -2541,9 +2476,6 @@ struct DeviceSegmentedSort * @tparam ValueT * [inferred] Value type * - * @tparam OffsetT - * [inferred] Integer type for global offsets - * * @tparam BeginOffsetIteratorT * [inferred] Random-access input iterator type for reading segment * beginning offsets \iterator @@ -2600,7 +2532,6 @@ struct DeviceSegmentedSort */ template CUB_RUNTIME_FUNCTION static cudaError_t @@ -2608,8 +2539,8 @@ struct DeviceSegmentedSort std::size_t &temp_storage_bytes, DoubleBuffer &d_keys, DoubleBuffer &d_values, - OffsetT num_items, - unsigned int num_segments, + int num_items, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, cudaStream_t stream = 0, @@ -2617,7 +2548,6 @@ struct DeviceSegmentedSort { return SortPairsDescending(d_temp_storage, temp_storage_bytes, diff --git a/cub/device/dispatch/dispatch_segmented_sort.cuh b/cub/device/dispatch/dispatch_segmented_sort.cuh index 113fe7128a..69ddf1771f 100644 --- a/cub/device/dispatch/dispatch_segmented_sort.cuh +++ b/cub/device/dispatch/dispatch_segmented_sort.cuh @@ -985,7 +985,7 @@ struct DispatchSegmentedSort : SelectedPolicy OffsetT num_items; /// The number of segments that comprise the sorting data - unsigned int num_segments; + int num_segments; /** * Random-access input iterator to the sequence of beginning offsets of length @@ -1021,7 +1021,7 @@ struct DispatchSegmentedSort : SelectedPolicy DoubleBuffer &d_keys, DoubleBuffer &d_values, OffsetT num_items, - unsigned int num_segments, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, bool is_overwrite_okay, @@ -1132,7 +1132,7 @@ struct DispatchSegmentedSort : SelectedPolicy small_segments_indices.get(), medium_indices_iterator, group_sizes.get(), - static_cast(num_segments), + num_segments, large_segments_selector, small_segments_selector, stream, @@ -1264,7 +1264,7 @@ struct DispatchSegmentedSort : SelectedPolicy DoubleBuffer &d_keys, DoubleBuffer &d_values, OffsetT num_items, - unsigned int num_segments, + int num_segments, BeginOffsetIteratorT d_begin_offsets, EndOffsetIteratorT d_end_offsets, bool is_overwrite_okay, @@ -1366,7 +1366,7 @@ private: small_segments_indices.get(), medium_indices_iterator, group_sizes.get(), - static_cast(num_segments), + num_segments, large_segments_selector, small_segments_selector, stream, @@ -1453,8 +1453,9 @@ private: } const unsigned int small_segments = h_group_sizes[1]; - const unsigned int medium_segments = num_segments - - (large_segments + small_segments); + const unsigned int medium_segments = + static_cast(num_segments) - + (large_segments + small_segments); const unsigned int small_blocks = DivideAndRoundUp(small_segments, @@ -1530,8 +1531,9 @@ private: { cudaError_t error = cudaSuccess; - const unsigned int blocks_in_grid = num_segments; - const unsigned int threads_in_block = LargeSegmentPolicyT::BLOCK_THREADS; + const auto blocks_in_grid = static_cast(num_segments); + const auto threads_in_block = + static_cast(LargeSegmentPolicyT::BLOCK_THREADS); // Log kernel configuration if (debug_synchronous) diff --git a/test/test_device_segmented_sort.cu b/test/test_device_segmented_sort.cu index 20ddc4545a..aa24ccf53b 100644 --- a/test/test_device_segmented_sort.cu +++ b/test/test_device_segmented_sort.cu @@ -93,15 +93,14 @@ public: int segment_size {}; }; -template +template struct SegmentChecker { const KeyT *sorted_keys {}; - const OffsetT *offsets {}; + const int *offsets {}; SegmentChecker(const KeyT *sorted_keys, - const OffsetT *offsets) + const int *offsets) : sorted_keys(sorted_keys) , offsets(offsets) {} @@ -124,15 +123,14 @@ struct SegmentChecker } }; -template +template struct DescendingSegmentChecker { const KeyT *sorted_keys{}; - const OffsetT *offsets{}; + const int *offsets{}; DescendingSegmentChecker(const KeyT *sorted_keys, - const OffsetT *offsets) + const int *offsets) : sorted_keys(sorted_keys) , offsets(offsets) {} @@ -155,15 +153,14 @@ struct DescendingSegmentChecker } }; -template +template struct ReversedIota { KeyT *data {}; - const OffsetT *offsets {}; + const int *offsets {}; ReversedIota(KeyT *data, - const OffsetT *offsets) + const int *offsets) : data(data) , offsets(offsets) {} @@ -183,14 +180,13 @@ struct ReversedIota }; -template +template struct Iota { KeyT *data{}; - const OffsetT *offsets{}; + const int *offsets{}; - Iota(KeyT *data, const OffsetT *offsets) + Iota(KeyT *data, const int *offsets) : data(data) , offsets(offsets) {} @@ -210,14 +206,13 @@ struct Iota template class Input { thrust::default_random_engine random_engine; - thrust::device_vector d_segment_sizes; - thrust::device_vector d_offsets; - thrust::host_vector h_offsets; + thrust::device_vector d_segment_sizes; + thrust::device_vector d_offsets; + thrust::host_vector h_offsets; using MaskedValueT = typename std::conditional< std::is_same::value, @@ -227,17 +222,17 @@ class Input using UnwrappedKeyT = typename UnwrapHalfAndBfloat16::Type; bool reverse {}; - OffsetT num_items {}; + int num_items {}; thrust::device_vector d_keys; thrust::device_vector d_values; public: - Input(bool reverse, const thrust::host_vector &h_segment_sizes) + Input(bool reverse, const thrust::host_vector &h_segment_sizes) : d_segment_sizes(h_segment_sizes) , d_offsets(d_segment_sizes.size() + 1) , h_offsets(d_segment_sizes.size() + 1) , reverse(reverse) - , num_items(static_cast( + , num_items(static_cast( thrust::reduce(d_segment_sizes.begin(), d_segment_sizes.end()))) , d_keys(num_items) , d_values(num_items) @@ -245,7 +240,7 @@ public: update(); } - Input(thrust::host_vector &h_offsets) + Input(thrust::host_vector &h_offsets) : d_offsets(h_offsets) , h_offsets(h_offsets) , reverse(false) @@ -262,7 +257,7 @@ public: update(); } - OffsetT get_num_items() const + int get_num_items() const { return num_items; } @@ -292,7 +287,7 @@ public: return thrust::raw_pointer_cast(d_keys.data()); } - const thrust::host_vector& get_h_offsets() + const thrust::host_vector& get_h_offsets() { return h_offsets; } @@ -302,7 +297,7 @@ public: return thrust::raw_pointer_cast(d_values.data()); } - const OffsetT *get_d_offsets() const + const int *get_d_offsets() const { return thrust::raw_pointer_cast(d_offsets.data()); } @@ -319,8 +314,8 @@ public: thrust::counting_iterator( static_cast(get_num_segments())), is_segment_sorted.begin(), - DescendingSegmentChecker{keys_output, - get_d_offsets()}); + DescendingSegmentChecker{keys_output, + get_d_offsets()}); } else { @@ -329,8 +324,7 @@ public: thrust::counting_iterator( static_cast(get_num_segments())), is_segment_sorted.begin(), - SegmentChecker{keys_output, - get_d_offsets()}); + SegmentChecker{keys_output, get_d_offsets()}); } return thrust::reduce(is_segment_sorted.begin(), @@ -375,7 +369,7 @@ private: { thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(total_segments), - Iota{ + Iota{ reinterpret_cast(get_d_keys()), get_d_offsets()}); } @@ -383,7 +377,7 @@ private: { thrust::for_each(thrust::counting_iterator(0), thrust::counting_iterator(total_segments), - ReversedIota{ + ReversedIota{ reinterpret_cast(get_d_keys()), get_d_offsets()}); } @@ -392,10 +386,10 @@ private: } }; -template +template class InputDescription { - thrust::host_vector segment_sizes; + thrust::host_vector segment_sizes; public: InputDescription& add(const SizeGroupDescription &group) @@ -413,16 +407,16 @@ public: } template - Input gen(bool reverse) + Input gen(bool reverse) { - return Input(reverse, segment_sizes); + return Input(reverse, segment_sizes); } }; -template -class InputDescription +template <> +class InputDescription { - thrust::host_vector segment_sizes; + thrust::host_vector segment_sizes; public: InputDescription& add(const SizeGroupDescription &group) @@ -436,16 +430,15 @@ public: } template - Input gen(bool reverse) + Input gen(bool reverse) { - return Input(reverse, segment_sizes); + return Input(reverse, segment_sizes); } }; template + typename ValueT> void Sort(bool pairs, bool descending, bool double_buffer, @@ -460,9 +453,9 @@ void Sort(bool pairs, ValueT *input_values, ValueT *output_values, - OffsetT num_items, - unsigned int num_segments, - const OffsetT *d_offsets, + int num_items, + int num_segments, + const int *d_offsets, int *keys_selector = nullptr, int *values_selector = nullptr) @@ -797,8 +790,7 @@ void Sort(bool pairs, } template + typename ValueT> std::size_t Sort(bool pairs, bool descending, bool double_buffer, @@ -810,49 +802,49 @@ std::size_t Sort(bool pairs, ValueT *input_values, ValueT *output_values, - OffsetT num_items, - unsigned int num_segments, - const OffsetT *d_offsets, + int num_items, + int num_segments, + const int *d_offsets, int *keys_selector = nullptr, int *values_selector = nullptr) { std::size_t temp_storage_bytes = 42ul; - Sort(pairs, - descending, - double_buffer, - stable_sort, - nullptr, - temp_storage_bytes, - input_keys, - output_keys, - input_values, - output_values, - num_items, - num_segments, - d_offsets, - keys_selector, - values_selector); + Sort(pairs, + descending, + double_buffer, + stable_sort, + nullptr, + temp_storage_bytes, + input_keys, + output_keys, + input_values, + output_values, + num_items, + num_segments, + d_offsets, + keys_selector, + values_selector); thrust::device_vector temp_storage(temp_storage_bytes); std::uint8_t *d_temp_storage = thrust::raw_pointer_cast(temp_storage.data()); - Sort(pairs, - descending, - double_buffer, - stable_sort, - d_temp_storage, - temp_storage_bytes, - input_keys, - output_keys, - input_values, - output_values, - num_items, - num_segments, - d_offsets, - keys_selector, - values_selector); + Sort(pairs, + descending, + double_buffer, + stable_sort, + d_temp_storage, + temp_storage_bytes, + input_keys, + output_keys, + input_values, + output_values, + num_items, + num_segments, + d_offsets, + keys_selector, + values_selector); return temp_storage_bytes; } @@ -878,7 +870,6 @@ void TestZeroSegments() using KeyT = std::uint8_t; using ValueT = std::uint64_t; - using OffsetT = std::uint32_t; for (bool stable_sort: { unstable, stable }) { @@ -892,19 +883,19 @@ void TestZeroSegments() cub::DoubleBuffer values_buffer(nullptr, nullptr); values_buffer.selector = 1; - Sort(sort_pairs, - sort_descending, - sort_buffer, - stable_sort, - nullptr, - nullptr, - nullptr, - nullptr, - OffsetT{}, - OffsetT{}, - nullptr, - &keys_buffer.selector, - &values_buffer.selector); + Sort(sort_pairs, + sort_descending, + sort_buffer, + stable_sort, + nullptr, + nullptr, + nullptr, + nullptr, + int{}, + int{}, + nullptr, + &keys_buffer.selector, + &values_buffer.selector); AssertEquals(keys_buffer.selector, 0); AssertEquals(values_buffer.selector, 1); @@ -915,17 +906,16 @@ void TestZeroSegments() } -void TestEmptySegments(unsigned int segments) +void TestEmptySegments(int segments) { // Type doesn't affect the escape logic, so it should be fine // to test only one set of types here. using KeyT = std::uint8_t; using ValueT = std::uint64_t; - using OffsetT = std::uint32_t; - thrust::device_vector offsets(segments + 1, OffsetT{}); - const OffsetT *d_offsets = thrust::raw_pointer_cast(offsets.data()); + thrust::device_vector offsets(segments + 1, int{}); + const int *d_offsets = thrust::raw_pointer_cast(offsets.data()); for (bool sort_stable: { unstable, stable }) { @@ -939,19 +929,19 @@ void TestEmptySegments(unsigned int segments) cub::DoubleBuffer values_buffer(nullptr, nullptr); values_buffer.selector = 1; - Sort(sort_pairs, - sort_descending, - sort_buffer, - sort_stable, - nullptr, - nullptr, - nullptr, - nullptr, - OffsetT{}, - segments, - d_offsets, - &keys_buffer.selector, - &values_buffer.selector); + Sort(sort_pairs, + sort_descending, + sort_buffer, + sort_stable, + nullptr, + nullptr, + nullptr, + nullptr, + int{}, + segments, + d_offsets, + &keys_buffer.selector, + &values_buffer.selector); AssertEquals(keys_buffer.selector, 0); AssertEquals(values_buffer.selector, 1); @@ -963,22 +953,21 @@ void TestEmptySegments(unsigned int segments) template -void TestSameSizeSegments(OffsetT segment_size, - unsigned int segments, + typename ValueT> +void TestSameSizeSegments(int segment_size, + int segments, bool skip_values = false) { - const OffsetT num_items = segment_size * segments; + const int num_items = segment_size * segments; using UnwrappedKeyT = typename UnwrapHalfAndBfloat16::Type; - thrust::device_vector offsets(segments + 1); + thrust::device_vector offsets(segments + 1); thrust::sequence(offsets.begin(), offsets.end(), - OffsetT{}, - OffsetT{segment_size}); + int{}, + segment_size); - const OffsetT *d_offsets = thrust::raw_pointer_cast(offsets.data()); + const int *d_offsets = thrust::raw_pointer_cast(offsets.data()); const KeyT target_key {42}; const ValueT target_value {42}; @@ -1033,19 +1022,19 @@ void TestSameSizeSegments(OffsetT segment_size, } const std::size_t temp_storage_bytes = - Sort(sort_pairs, - sort_descending, - sort_buffers, - stable_sort, - d_keys_input, - d_keys_output, - d_values_input, - d_values_output, - num_items, - segments, - d_offsets, - &keys_buffer.selector, - &values_buffer.selector); + Sort(sort_pairs, + sort_descending, + sort_buffers, + stable_sort, + d_keys_input, + d_keys_output, + d_values_input, + d_values_output, + num_items, + segments, + d_offsets, + &keys_buffer.selector, + &values_buffer.selector); // If temporary storage size is defined by extra keys storage if (sort_buffers) @@ -1088,7 +1077,7 @@ void TestSameSizeSegments(OffsetT segment_size, reinterpret_cast(d_keys_input + num_items), static_cast(target_key)); - AssertEquals(items_selected, num_items); + AssertEquals(static_cast(items_selected), num_items); } if (sort_pairs) @@ -1110,7 +1099,7 @@ void TestSameSizeSegments(OffsetT segment_size, target_value); } (); - AssertEquals(items_selected, num_items); + AssertEquals(static_cast(items_selected), num_items); } } } @@ -1120,10 +1109,9 @@ void TestSameSizeSegments(OffsetT segment_size, template + typename ValueT> void InputTest(bool sort_descending, - Input &input) + Input &input) { thrust::device_vector keys_output(input.get_num_items()); KeyT *d_keys_output = thrust::raw_pointer_cast(keys_output.data()); @@ -1147,19 +1135,19 @@ void InputTest(bool sort_descending, cub::DoubleBuffer values_buffer(input.get_d_values(), d_values_output); - Sort(sort_pairs, - sort_descending, - sort_buffers, - stable_sort, - input.get_d_keys(), - d_keys_output, - input.get_d_values(), - d_values_output, - input.get_num_items(), - input.get_num_segments(), - input.get_d_offsets(), - &keys_buffer.selector, - &values_buffer.selector); + Sort(sort_pairs, + sort_descending, + sort_buffers, + stable_sort, + input.get_d_keys(), + d_keys_output, + input.get_d_values(), + d_values_output, + input.get_num_items(), + input.get_num_segments(), + input.get_d_offsets(), + &keys_buffer.selector, + &values_buffer.selector); if (sort_buffers) { @@ -1206,9 +1194,8 @@ struct ComparisonPredicate } }; -template -bool compare_two_outputs(const thrust::host_vector &offsets, +template +bool compare_two_outputs(const thrust::host_vector &offsets, const thrust::host_vector &lhs, const thrust::host_vector &rhs) { @@ -1281,12 +1268,11 @@ void RandomizeInput(thrust::host_vector &h_keys, template + typename ValueT> void HostReferenceSort(bool sort_pairs, bool sort_descending, unsigned int num_segments, - const thrust::host_vector &h_offsets, + const thrust::host_vector &h_offsets, thrust::host_vector &h_keys, thrust::host_vector &h_values) { @@ -1294,8 +1280,8 @@ void HostReferenceSort(bool sort_pairs, segment_i < num_segments; segment_i++) { - const OffsetT segment_begin = h_offsets[segment_i]; - const OffsetT segment_end = h_offsets[segment_i + 1]; + const int segment_begin = h_offsets[segment_i]; + const int segment_end = h_offsets[segment_i + 1]; if (sort_pairs) { @@ -1333,16 +1319,15 @@ void HostReferenceSort(bool sort_pairs, #if STORE_ON_FAILURE template + typename ValueT> void DumpInput(bool sort_pairs, bool sort_descending, bool sort_buffers, - Input &input, + Input &input, thrust::host_vector &h_keys, thrust::host_vector &h_values) { - const thrust::host_vector &h_offsets = input.get_h_offsets(); + const thrust::host_vector &h_offsets = input.get_h_offsets(); std::cout << "sort pairs: " << sort_pairs << "\n"; std::cout << "sort descending: " << sort_descending << "\n"; @@ -1356,7 +1341,7 @@ void DumpInput(bool sort_pairs, std::ofstream offsets_dump("offsets", std::ios::binary); offsets_dump.write(reinterpret_cast( thrust::raw_pointer_cast(h_offsets.data())), - sizeof(OffsetT) * h_offsets.size()); + sizeof(int) * h_offsets.size()); std::ofstream keys_dump("keys", std::ios::binary); keys_dump.write(reinterpret_cast( @@ -1372,9 +1357,8 @@ void DumpInput(bool sort_pairs, template -void InputTestRandom(Input &input) + typename ValueT> +void InputTestRandom(Input &input) { thrust::host_vector h_keys_output(input.get_num_items()); thrust::device_vector keys_output(input.get_num_items()); @@ -1388,7 +1372,7 @@ void InputTestRandom(Input &input) thrust::host_vector h_keys(input.get_num_items()); thrust::host_vector h_values(input.get_num_items()); - const thrust::host_vector &h_offsets = input.get_h_offsets(); + const thrust::host_vector &h_offsets = input.get_h_offsets(); for (bool stable_sort: { unstable, stable }) { @@ -1413,20 +1397,19 @@ void InputTestRandom(Input &input) cub::DoubleBuffer keys_buffer(input.get_d_keys(), d_keys_output); cub::DoubleBuffer values_buffer(input.get_d_values(), d_values_output); - Sort( - sort_pairs, - sort_descending, - sort_buffers, - stable_sort, - input.get_d_keys(), - d_keys_output, - input.get_d_values(), - d_values_output, - input.get_num_items(), - input.get_num_segments(), - input.get_d_offsets(), - &keys_buffer.selector, - &values_buffer.selector); + Sort(sort_pairs, + sort_descending, + sort_buffers, + stable_sort, + input.get_d_keys(), + d_keys_output, + input.get_d_values(), + d_values_output, + input.get_num_items(), + input.get_num_segments(), + input.get_d_offsets(), + &keys_buffer.selector, + &values_buffer.selector); HostReferenceSort(sort_pairs, sort_descending, @@ -1472,12 +1455,12 @@ void InputTestRandom(Input &input) #if STORE_ON_FAILURE if (!keys_ok || !values_ok) { - DumpInput(sort_pairs, - sort_descending, - sort_buffers, - input, - h_keys_backup, - h_values_backup); + DumpInput(sort_pairs, + sort_descending, + sort_buffers, + input, + h_keys_backup, + h_values_backup); } #endif @@ -1493,8 +1476,7 @@ AssertTrue(keys_ok); } template + typename ValueT> struct EdgeTestDispatch { // Edge cases that needs to be tested @@ -1530,8 +1512,8 @@ struct EdgeTestDispatch for (bool sort_descending : {ascending, descending}) { - Input edge_cases = - InputDescription() + Input edge_cases = + InputDescription() .add({a_lot_of, empty_short_circuit_segment_size}) .add({a_lot_of, copy_short_circuit_segment_size}) .add({a_lot_of, swap_short_circuit_segment_size}) @@ -1557,7 +1539,7 @@ struct EdgeTestDispatch .add({a_few, large_cached_segment_max_segment_size * 5}) .template gen(sort_descending); - InputTest(sort_descending, edge_cases); + InputTest(sort_descending, edge_cases); } return cudaSuccess; @@ -1565,8 +1547,7 @@ struct EdgeTestDispatch }; template + typename ValueT> void EdgePatternsTest() { int ptx_version = 0; @@ -1577,7 +1558,7 @@ void EdgePatternsTest() using MaxPolicyT = typename cub::DeviceSegmentedSortPolicy::MaxPolicy; - using EdgeTestDispatchT = EdgeTestDispatch; + using EdgeTestDispatchT = EdgeTestDispatch; EdgeTestDispatchT dispatch; MaxPolicyT::Invoke(ptx_version, dispatch); @@ -1585,26 +1566,24 @@ void EdgePatternsTest() } template -Input GenRandomInput(OffsetT max_items, - OffsetT min_segments, - OffsetT max_segments, - bool descending) + typename ValueT> +Input GenRandomInput(int max_items, + int min_segments, + int max_segments, + bool descending) { - std::size_t items_generated {}; - const std::size_t segments_num = RandomValue(max_segments) + min_segments; + int items_generated {}; + const int segments_num = RandomValue(max_segments) + min_segments; - thrust::host_vector segment_sizes; + thrust::host_vector segment_sizes; segment_sizes.reserve(segments_num); - const OffsetT max_segment_size = 6000; + const int max_segment_size = 6000; - for (std::size_t segment_id = 0; segment_id < segments_num; segment_id++) + for (int segment_id = 0; segment_id < segments_num; segment_id++) { - const OffsetT segment_size_raw = RandomValue(max_segment_size); - const OffsetT segment_size = segment_size_raw > OffsetT{0} ? segment_size_raw - : OffsetT{0}; + const int segment_size_raw = RandomValue(max_segment_size); + const int segment_size = segment_size_raw > 0 ? segment_size_raw : 0; if (segment_size + items_generated > max_items) { @@ -1615,72 +1594,65 @@ Input GenRandomInput(OffsetT max_items, segment_sizes.push_back(segment_size); } - return Input{descending, segment_sizes}; + return Input{descending, segment_sizes}; } template -void RandomTest(OffsetT min_segments, - OffsetT max_segments) + typename ValueT> +void RandomTest(int min_segments, + int max_segments) { - const OffsetT max_items = 10000000; + const int max_items = 10000000; for (int iteration = 0; iteration < 10 * MAX_ITERATIONS; iteration++) { - Input edge_cases = - GenRandomInput(max_items, - min_segments, - max_segments, - descending); + Input edge_cases = GenRandomInput(max_items, + min_segments, + max_segments, + descending); InputTestRandom(edge_cases); } } -template +template void TestKeys() { const bool skip_values = true; - for (OffsetT segment_size : {1, 1024, 24 * 1024}) + for (int segment_size : {1, 1024, 24 * 1024}) { for (int segments : {1, 1024}) { - TestSameSizeSegments(segment_size, - segments, - skip_values); + TestSameSizeSegments(segment_size, segments, skip_values); } } } template + typename ValueT> void TestPairs() { - for (OffsetT segment_size: { 1, 1024, 24 * 1024 }) + for (int segment_size: { 1, 1024, 24 * 1024 }) { for (int segments: { 1, 1024 }) { - TestSameSizeSegments(segment_size, segments); + TestSameSizeSegments(segment_size, segments); } } - RandomTest(1 << 2, 1 << 8); - RandomTest(1 << 9, 1 << 19); - EdgePatternsTest(); + RandomTest(1 << 2, 1 << 8); + RandomTest(1 << 9, 1 << 19); + EdgePatternsTest(); } -template +template void TestKeysAndPairs() { - TestKeys(); - TestPairs(); + TestKeys(); + TestPairs(); } @@ -1695,23 +1667,22 @@ int main(int argc, char** argv) TestEmptySegments(1 << 2); TestEmptySegments(1 << 22); - TestPairs(); + TestPairs(); #if TEST_HALF_T - TestPairs(); + TestPairs(); #endif #if TEST_BF_T - TestPairs(); + TestPairs(); #endif - TestKeysAndPairs(); - TestKeysAndPairs(); - TestKeysAndPairs(); - TestKeysAndPairs(); - TestKeysAndPairs(); - TestPairs(); - TestPairs(); + TestKeysAndPairs(); + TestKeysAndPairs(); + TestKeysAndPairs(); + TestKeysAndPairs(); + TestPairs(); + TestPairs(); return 0; } diff --git a/test/test_thread_sort.cu b/test/test_thread_sort.cu index 5b7290304e..98853232c7 100644 --- a/test/test_thread_sort.cu +++ b/test/test_thread_sort.cu @@ -145,4 +145,4 @@ int main() Test(); return 0; -} \ No newline at end of file +}