Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Merge pull request #694 from senior-zero/enh-main/github/histogram_tu…
Browse files Browse the repository at this point in the history
…ning

Add policy parameter to allow tuning
  • Loading branch information
gevtushenko committed May 27, 2023
2 parents 562cf94 + 3567ba0 commit e6eb419
Show file tree
Hide file tree
Showing 38 changed files with 5,202 additions and 2,180 deletions.
23 changes: 11 additions & 12 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,28 @@
- reduce
- sum
- max
- by_key
- scan
- sum
- max
- by key
- select
- flagged
- if
- partition
- flagged
- if
- scan
- by key

### TODO

- adjacent difference
- left
- right
- histogram : needs policy
- histogram
- even
- range
- rle : needs policy
- multi even
- multi range
- rle
- encode
- non trivial runs
- reduce : needs policy
- by key
- adjacent difference
- left

### TODO

- segmented
121 changes: 121 additions & 0 deletions benchmarks/bench/adjacent_difference/subtract_left.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/******************************************************************************
* Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#include <cub/device/device_adjacent_difference.cuh>

#include <nvbench_helper.cuh>

// %RANGE% TUNE_ITEMS_PER_THREAD ipt 7:24:1
// %RANGE% TUNE_THREADS_PER_BLOCK tpb 128:1024:32

#if !TUNE_BASE
struct policy_hub_t
{
struct Policy350 : cub::ChainedPolicy<350, Policy350, Policy350>
{
using AdjacentDifferencePolicy =
cub::AgentAdjacentDifferencePolicy<TUNE_THREADS_PER_BLOCK,
TUNE_ITEMS_PER_THREAD,
cub::BLOCK_LOAD_WARP_TRANSPOSE,
cub::LOAD_CA,
cub::BLOCK_STORE_WARP_TRANSPOSE>;
};

using MaxPolicy = Policy350;
};
#endif // !TUNE_BASE

template <class T, class OffsetT>
void adjacent_difference(nvbench::state& state, nvbench::type_list<T, OffsetT>)
{
constexpr bool may_alias = false;
constexpr bool read_left = true;

using input_it_t = const T*;
using output_it_t = T*;
using difference_op_t = cub::Difference;
using offset_t = typename cub::detail::ChooseOffsetT<OffsetT>::Type;

#if !TUNE_BASE
using dispatch_t = cub::DispatchAdjacentDifference<input_it_t,
output_it_t,
difference_op_t,
offset_t,
may_alias,
read_left,
policy_hub_t>;
#else
using dispatch_t = cub::DispatchAdjacentDifference<input_it_t,
output_it_t,
difference_op_t,
offset_t,
may_alias,
read_left>;
#endif // TUNE_BASE

const auto elements = static_cast<std::size_t>(state.get_int64("Elements{io}"));
thrust::device_vector<T> in(elements);
thrust::device_vector<T> out(elements);
gen(seed_t{}, in);

input_it_t d_in = thrust::raw_pointer_cast(in.data());
output_it_t d_out = thrust::raw_pointer_cast(out.data());

state.add_element_count(elements);
state.add_global_memory_reads<T>(elements);
state.add_global_memory_writes<T>(elements);

std::size_t temp_storage_bytes{};
dispatch_t::Dispatch(nullptr,
temp_storage_bytes,
d_in,
d_out,
static_cast<offset_t>(elements),
difference_op_t{},
0);

thrust::device_vector<std::uint8_t> temp_storage(temp_storage_bytes);
std::uint8_t* d_temp_storage = thrust::raw_pointer_cast(temp_storage.data());

state.exec([&](nvbench::launch &launch) {
dispatch_t::Dispatch(d_temp_storage,
temp_storage_bytes,
d_in,
d_out,
static_cast<offset_t>(elements),
difference_op_t{},
launch.get_stream());
});
}


using types = nvbench::type_list<int32_t>;

NVBENCH_BENCH_TYPES(adjacent_difference, NVBENCH_TYPE_AXES(types, offset_types))
.set_name("cub::DeviceAdjacentDifference::SubtractLeftCopy")
.set_type_axes_names({"T{ct}", "OffsetT{ct}"})
.add_int64_power_of_two_axis("Elements{io}", nvbench::range(16, 28, 4));
142 changes: 142 additions & 0 deletions benchmarks/bench/histogram/even.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/******************************************************************************
* Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#include "histogram_common.cuh"
#include <nvbench_helper.cuh>

// %RANGE% TUNE_ITEMS ipt 7:24:1
// %RANGE% TUNE_THREADS tpb 128:1024:32
// %RANGE% TUNE_RLE_COMPRESS rle 0:1:1
// %RANGE% TUNE_WORK_STEALING ws 0:1:1
// %RANGE% TUNE_MEM_PREFERENCE mem 0:2:1
// %RANGE% TUNE_LOAD ld 0:2:1

template <typename SampleT, typename CounterT, typename OffsetT>
static void histogram(nvbench::state &state, nvbench::type_list<SampleT, CounterT, OffsetT>)
{
constexpr int num_channels = 1;
constexpr int num_active_channels = 1;

using sample_iterator_t = SampleT *;

#if !TUNE_BASE
using policy_t = policy_hub_t<key_t, num_active_channels>;
using dispatch_t = cub::DispatchHistogram<num_channels, //
num_active_channels,
sample_iterator_t,
CounterT,
SampleT,
OffsetT,
policy_t>;
#else // TUNE_BASE
using dispatch_t = cub::DispatchHistogram<num_channels, //
num_active_channels,
sample_iterator_t,
CounterT,
SampleT,
OffsetT>;
#endif // TUNE_BASE

const auto entropy = str_to_entropy(state.get_string("Entropy"));
const auto elements = state.get_int64("Elements{io}");
const auto num_bins = state.get_int64("Bins");
const int num_levels = static_cast<int>(num_bins) + 1;

const SampleT lower_level = 0;
const SampleT upper_level = get_upper_level<SampleT>(num_bins, elements);

thrust::device_vector<SampleT> input(elements);
thrust::device_vector<CounterT> hist(num_bins);
gen(seed_t{}, input, entropy, lower_level, upper_level);

SampleT *d_input = thrust::raw_pointer_cast(input.data());
CounterT *d_histogram = thrust::raw_pointer_cast(hist.data());

CounterT *d_histogram1[1] = {d_histogram};
int num_levels1[1] = {num_levels};
SampleT lower_level1[1] = {lower_level};
SampleT upper_level1[1] = {upper_level};

std::uint8_t *d_temp_storage = nullptr;
std::size_t temp_storage_bytes{};

cub::Int2Type<sizeof(SampleT) == 1> is_byte_sample;
OffsetT num_row_pixels = static_cast<OffsetT>(elements);
OffsetT num_rows = 1;
OffsetT row_stride_samples = num_row_pixels;

state.add_element_count(elements);
state.add_global_memory_reads<SampleT>(elements);
state.add_global_memory_writes<CounterT>(num_bins);

dispatch_t::DispatchEven(d_temp_storage,
temp_storage_bytes,
d_input,
d_histogram1,
num_levels1,
lower_level1,
upper_level1,
num_row_pixels,
num_rows,
row_stride_samples,
0,
is_byte_sample);

thrust::device_vector<nvbench::uint8_t> tmp(temp_storage_bytes);
d_temp_storage = thrust::raw_pointer_cast(tmp.data());

state.exec([&](nvbench::launch &launch) {
dispatch_t::DispatchEven(d_temp_storage,
temp_storage_bytes,
d_input,
d_histogram1,
num_levels1,
lower_level1,
upper_level1,
num_row_pixels,
num_rows,
row_stride_samples,
launch.get_stream(),
is_byte_sample);
});
}

using bin_types = nvbench::type_list<int32_t>;
using some_offset_types = nvbench::type_list<int32_t>;

#ifdef TUNE_SampleT
using sample_types = nvbench::type_list<TUNE_SampleT>;
#else // !defined(TUNE_SampleT)
using sample_types = nvbench::type_list<int8_t, int16_t, int32_t, int64_t, float, double>;
#endif // TUNE_SampleT

NVBENCH_BENCH_TYPES(histogram, NVBENCH_TYPE_AXES(sample_types, bin_types, some_offset_types))
.set_name("cub::cub::DeviceHistogram::HistogramEven")
.set_type_axes_names({"SampleT{ct}", "BinT{ct}", "OffsetT{ct}"})
.add_int64_power_of_two_axis("Elements{io}", nvbench::range(16, 28, 4))
.add_int64_axis("Bins", {128, 2048, 2097152})
.add_string_axis("Entropy", {"1.000", "0.544", "0.000"});
Loading

0 comments on commit e6eb419

Please sign in to comment.