-
Notifications
You must be signed in to change notification settings - Fork 106
Use robust centrality dispersion measures #348
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ea353ed
976c112
2333a2d
6e45072
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -173,6 +173,56 @@ void measure_cpu_only_base::generate_summaries() | |
| summ.set_float64("value", cpu_noise); | ||
| } | ||
|
|
||
| const auto [cpu_first_quartile, cpu_median, cpu_third_quartile] = | ||
| nvbench::detail::statistics::compute_percentiles(m_cpu_times.cbegin(), | ||
| m_cpu_times.cend(), | ||
| {25, 50, 75}); | ||
| { | ||
| auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/q1"); | ||
| summ.set_string("name", "Q1"); | ||
| summ.set_string("hint", "duration"); | ||
| summ.set_string("description", "First quartile of CPU times of isolated kernel executions"); | ||
| summ.set_float64("value", cpu_first_quartile); | ||
| summ.set_string("hide", "Hidden by default."); | ||
| } | ||
| { | ||
| auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/median"); | ||
| summ.set_string("name", "Median"); | ||
| summ.set_string("hint", "duration"); | ||
| summ.set_string("description", "Median of CPU times of isolated kernel executions"); | ||
| summ.set_float64("value", cpu_median); | ||
| summ.set_string("hide", "Hidden by default."); | ||
| } | ||
| { | ||
| auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/q3"); | ||
| summ.set_string("name", "Q3"); | ||
| summ.set_string("hint", "duration"); | ||
| summ.set_string("description", "Third quartile of CPU times of isolated kernel executions"); | ||
| summ.set_string("hide", "Hidden by default."); | ||
| summ.set_float64("value", cpu_third_quartile); | ||
| } | ||
| { | ||
| auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/ir/absolute"); | ||
| summ.set_string("name", "IR"); | ||
| summ.set_string("hint", "duration"); | ||
| summ.set_string("description", | ||
| "Interquartile range of CPU times of isolated kernel executions"); | ||
| summ.set_string("hide", "Hidden by default."); | ||
| const auto cpu_ir = cpu_third_quartile - cpu_first_quartile; | ||
| summ.set_float64("value", cpu_ir); | ||
| } | ||
| { | ||
| auto &summ = m_state.add_summary("nv/cpu_only/time/cpu/ir/relative"); | ||
| summ.set_string("name", "IR"); | ||
| summ.set_string("hint", "percentage"); | ||
| summ.set_string("description", | ||
| "Relative interquartile range of CPU times of isolated kernel executions"); | ||
| summ.set_string("hide", "Hidden by default."); | ||
| const auto cpu_ir = cpu_third_quartile - cpu_first_quartile; | ||
| const auto cpu_robust_noise = cpu_ir / cpu_median; | ||
| summ.set_float64("value", cpu_robust_noise); | ||
|
Comment on lines
+215
to
+223
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. important: The relative IR metric at Line 222 divides by As per coding guidelines, "nvbench/**/*: Focus on benchmark correctness, ... measurement semantics, statistical summaries, and test coverage." |
||
| } | ||
|
|
||
| if (const auto items = m_state.get_element_count(); items != 0) | ||
| { | ||
| auto &summ = m_state.add_summary("nv/cpu_only/bw/item_rate"); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,12 +31,15 @@ | |
| #include <nvbench/detail/transform_reduce.cuh> | ||
| #include <nvbench/types.cuh> | ||
|
|
||
| #include <algorithm> | ||
| #include <array> | ||
| #include <cmath> | ||
| #include <functional> | ||
| #include <iterator> | ||
| #include <limits> | ||
| #include <numeric> | ||
| #include <type_traits> | ||
| #include <vector> | ||
|
|
||
| #ifndef M_PI | ||
| #define M_PI 3.14159265358979323846 | ||
|
|
@@ -93,6 +96,56 @@ nvbench::float64_t compute_mean(It first, It last) | |
| return std::accumulate(first, last, 0.0) / static_cast<nvbench::float64_t>(num); | ||
| } | ||
|
|
||
| /** | ||
| * Computes exact percentile values using rank round(p / 100 * (S - 1)). | ||
| * | ||
| * The input range is copied before sorting, so const iterators are supported. | ||
| * If the input has fewer than 1 sample, all percentiles are returned as infinity. | ||
| */ | ||
|
Comment on lines
+99
to
+104
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: Line 103 says empty input returns infinity, but Lines 115-116 return Also applies to: 115-116 |
||
| template <typename Iter, | ||
| std::size_t N, | ||
| typename ValueType = typename std::iterator_traits<Iter>::value_type> | ||
| std::array<ValueType, N> compute_percentiles(Iter first, Iter last, std::array<int, N> percentiles) | ||
| { | ||
| std::array<ValueType, N> result{}; | ||
|
|
||
| const auto num = std::distance(first, last); | ||
| if (num < 1) | ||
| { | ||
| result.fill(std::numeric_limits<ValueType>::quiet_NaN()); | ||
| return result; | ||
| } | ||
|
|
||
| std::vector<ValueType> sorted(first, last); | ||
| std::sort(sorted.begin(), sorted.end()); | ||
|
|
||
| const auto max_rank = static_cast<nvbench::float64_t>(sorted.size() - 1); | ||
| for (std::size_t i = 0; i < N; ++i) | ||
| { | ||
| const auto clamped_percentile = std::clamp(percentiles[i], 0, 100); | ||
|
|
||
| const auto quantile = static_cast<nvbench::float64_t>(clamped_percentile) / 100.0; | ||
| const auto rank = static_cast<std::size_t>(std::round(quantile * max_rank)); | ||
|
|
||
| result[i] = sorted[rank]; | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| /** | ||
| * Overload that supports calls like `compute_percentiles(first, last, {25, 50, 75})`. | ||
| */ | ||
| template <typename Iter, | ||
| std::size_t N, | ||
| typename ValueType = typename std::iterator_traits<Iter>::value_type> | ||
| std::array<ValueType, N> compute_percentiles(Iter first, Iter last, const int (&percentiles)[N]) | ||
| { | ||
| std::array<int, N> percentile_array{}; | ||
| std::copy(std::begin(percentiles), std::end(percentiles), percentile_array.begin()); | ||
| return compute_percentiles(first, last, percentile_array); | ||
| } | ||
|
|
||
| /** | ||
| * Computes linear regression and returns the slope and intercept | ||
| * | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,15 +41,20 @@ void stdrel_criterion::do_add_measurement(nvbench::float64_t measurement) | |
| m_total_cuda_time += measurement; | ||
| m_cuda_times.push_back(measurement); | ||
|
|
||
| // Compute convergence statistics using CUDA timings: | ||
| const auto mean_cuda_time = m_total_cuda_time / static_cast<nvbench::float64_t>(m_total_samples); | ||
| const auto cuda_stdev = nvbench::detail::statistics::standard_deviation(m_cuda_times.cbegin(), | ||
| m_cuda_times.cend(), | ||
| mean_cuda_time); | ||
| const auto cuda_rel_stdev = cuda_stdev / mean_cuda_time; | ||
| if (std::isfinite(cuda_rel_stdev)) | ||
| // require at least 5 samples for meaningful noise estimate | ||
| if (m_total_samples > 4) | ||
| { | ||
| m_noise_tracker.push_back(cuda_rel_stdev); | ||
| // Compute convergence statistics using CUDA timings: | ||
| const auto [cuda_first_quartile, cuda_median, cuda_third_quartile] = | ||
| nvbench::detail::statistics::compute_percentiles(m_cuda_times.cbegin(), | ||
| m_cuda_times.cend(), | ||
| {25, 50, 75}); | ||
| const auto cuda_noise = (cuda_third_quartile - cuda_first_quartile) / cuda_median; | ||
|
|
||
| if (std::isfinite(cuda_noise)) | ||
| { | ||
| m_noise_tracker.push_back(cuda_noise); | ||
| } | ||
|
Comment on lines
+44
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. critical: With the new gate at Line 45, As per coding guidelines, "nvbench/**/*: Focus on benchmark correctness, CUDA stream/event ordering, synchronization behavior, error handling, ... measurement semantics, statistical summaries, and test coverage." |
||
| } | ||
| } | ||
|
|
||
|
|
@@ -66,7 +71,7 @@ bool stdrel_criterion::do_is_finished() | |
| return true; | ||
| } | ||
|
|
||
| // Check if the noise (cuda rel stdev) has converged by inspecting a | ||
| // Check if the noise has converged by inspecting a | ||
| // trailing window of recorded noise measurements. | ||
| // This helps identify benchmarks that are inherently noisy and would | ||
| // never converge to the target stdev threshold. This check ensures that the | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
important: Relative IR is computed as
IQR / medianat Line 312 and Line 417 with no zero/finite guard. For very short or quantized timings, median can be zero and produceinf/nansummaries. Guard both calculations and publish only finite values.As per coding guidelines, "nvbench/**/*: Focus on benchmark correctness, ... measurement semantics, statistical summaries, and test coverage."
Also applies to: 411-419