Skip to content

Commit

Permalink
feat: op count timers (#4471)
Browse files Browse the repository at this point in the history
This expands on the previous
#4437 PR with the
ability to automatically microbenchmark pieces.

- If you want to measure timing , uses the preset `op-count-time`
- If you want to measure call counts, uses the preset `op-count-track`

Example of google benchmark running with preset op-count-time
```
------------------------------------------------------------------------------------
Benchmark                          Time             CPU   Iterations UserCounters...
------------------------------------------------------------------------------------
GoblinBench/GoblinFull/1       10609 ms         9065 ms            1 batch_mul_with_endomorphism=48 batch_mul_with_endomorphism(t)=646.575M
^C

-----------------------------------------------------------------------------------
Benchmark                         Time             CPU   Iterations UserCounters...
-----------------------------------------------------------------------------------
GoblinBench/GoblinFull/2      15225 ms        13342 ms            1 batch_mul_with_endomorphism=52 batch_mul_with_endomorphism(t)=1.02146G
```
Note with nanosecond units, one M(illion) is one millisecond, one G(iga)
is one second

---------

Co-authored-by: ludamad <adam@aztecprotocol.com>
  • Loading branch information
ludamad and ludamad0 committed Feb 12, 2024
1 parent ed6d521 commit 26918de
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 22 deletions.
3 changes: 1 addition & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,5 @@
"C_Cpp.vcpkg.enabled": false,
"C_Cpp.default.includePath": [
"barretenberg/cpp/src"
],
"cmake.sourceDirectory": "/mnt/user-data/adam/aztec-packages/barretenberg/cpp",
]
}
27 changes: 21 additions & 6 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -167,13 +167,23 @@
}
},
{
"name": "op-counting",
"displayName": "Release build with operation counts for benchmarks",
"name": "op-count-track",
"displayName": "Release build with operation counts",
"description": "Build with op counting",
"inherits": "clang16",
"binaryDir": "build-op-counting",
"binaryDir": "build-op-count-track",
"environment": {
"CXXFLAGS": "-DBB_USE_OP_COUNT"
"CXXFLAGS": "-DBB_USE_OP_COUNT -DBB_USE_OP_COUNT_TRACK_ONLY"
}
},
{
"name": "op-count-time",
"displayName": "Release build with time and clock counts",
"description": "Build with op counting",
"inherits": "clang16",
"binaryDir": "build-op-count-time",
"environment": {
"CXXFLAGS": "-DBB_USE_OP_COUNT -DBB_USE_OP_COUNT_TIME_ONLY"
}
},
{
Expand Down Expand Up @@ -311,9 +321,14 @@
"configurePreset": "clang16"
},
{
"name": "op-counting",
"name": "op-count-time",
"inherits": "default",
"configurePreset": "op-count-time"
},
{
"name": "op-count-track",
"inherits": "default",
"configurePreset": "op-counting"
"configurePreset": "op-count-track"
},
{
"name": "clang16-dbg",
Expand Down
65 changes: 59 additions & 6 deletions barretenberg/cpp/src/barretenberg/common/op_count.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,15 @@
#include <thread>

namespace bb::detail {
void GlobalOpCountContainer::add_entry(const char* key, std::size_t* count)

GlobalOpCountContainer::~GlobalOpCountContainer()
{
// This is useful for printing counts at the end of non-benchmarks.
// See op_count_google_bench.hpp for benchmarks.
// print();
}

void GlobalOpCountContainer::add_entry(const char* key, const std::shared_ptr<OpStats>& count)
{
std::unique_lock<std::mutex> lock(mutex);
std::stringstream ss;
Expand All @@ -19,8 +27,16 @@ void GlobalOpCountContainer::print() const
{
std::cout << "print_op_counts() START" << std::endl;
for (const Entry& entry : counts) {
if (*entry.count > 0) {
std::cout << entry.key << "\t" << *entry.count << "\t[thread=" << entry.thread_id << "]" << std::endl;
if (entry.count->count > 0) {
std::cout << entry.key << "\t" << entry.count->count << "\t[thread=" << entry.thread_id << "]" << std::endl;
}
if (entry.count->time > 0) {
std::cout << entry.key << "(t)\t" << static_cast<double>(entry.count->time) / 1000000.0
<< "ms\t[thread=" << entry.thread_id << "]" << std::endl;
}
if (entry.count->cycles > 0) {
std::cout << entry.key << "(c)\t" << entry.count->cycles << "\t[thread=" << entry.thread_id << "]"
<< std::endl;
}
}
std::cout << "print_op_counts() END" << std::endl;
Expand All @@ -30,8 +46,14 @@ std::map<std::string, std::size_t> GlobalOpCountContainer::get_aggregate_counts(
{
std::map<std::string, std::size_t> aggregate_counts;
for (const Entry& entry : counts) {
if (*entry.count > 0) {
aggregate_counts[entry.key] += *entry.count;
if (entry.count->count > 0) {
aggregate_counts[entry.key] += entry.count->count;
}
if (entry.count->time > 0) {
aggregate_counts[entry.key + "(t)"] += entry.count->time;
}
if (entry.count->cycles > 0) {
aggregate_counts[entry.key + "(c)"] += entry.count->cycles;
}
}
return aggregate_counts;
Expand All @@ -41,11 +63,42 @@ void GlobalOpCountContainer::clear()
{
std::unique_lock<std::mutex> lock(mutex);
for (Entry& entry : counts) {
*entry.count = 0;
*entry.count = OpStats();
}
}

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
GlobalOpCountContainer GLOBAL_OP_COUNTS;

OpCountCycleReporter::OpCountCycleReporter(OpStats* stats)
: stats(stats)
{
#if __clang__ && (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86))
// Don't support any other targets but x86 clang for now, this is a bit lazy but more than fits our needs
cycles = __builtin_ia32_rdtsc();
#endif
}
OpCountCycleReporter::~OpCountCycleReporter()
{
#if __clang__ && (defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86))
// Don't support any other targets but x86 clang for now, this is a bit lazy but more than fits our needs
stats->count += 1;
stats->cycles += __builtin_ia32_rdtsc() - cycles;
#endif
}
OpCountTimeReporter::OpCountTimeReporter(OpStats* stats)
: stats(stats)
{
auto now = std::chrono::high_resolution_clock::now();
auto now_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(now);
time = static_cast<std::size_t>(now_ns.time_since_epoch().count());
}
OpCountTimeReporter::~OpCountTimeReporter()
{
auto now = std::chrono::high_resolution_clock::now();
auto now_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(now);
stats->count += 1;
stats->time += static_cast<std::size_t>(now_ns.time_since_epoch().count()) - time;
}
} // namespace bb::detail
#endif
75 changes: 67 additions & 8 deletions barretenberg/cpp/src/barretenberg/common/op_count.hpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@

#pragma once

#include <memory>
#ifndef BB_USE_OP_COUNT
// require a semicolon to appease formatters
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK() (void)0
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK_NAME(name) (void)0
#define BB_OP_COUNT_CYCLES() (void)0
#define BB_OP_COUNT_TIME() (void)0
#else
/**
* Provides an abstraction that counts operations based on function names.
Expand Down Expand Up @@ -37,20 +40,27 @@ template <std::size_t N> struct OperationLabel {
char value[N];
};

struct OpStats {
std::size_t count = 0;
std::size_t time = 0;
std::size_t cycles = 0;
};

// Contains all statically known op counts
struct GlobalOpCountContainer {
public:
struct Entry {
std::string key;
std::string thread_id;
std::size_t* count;
std::shared_ptr<OpStats> count;
};
~GlobalOpCountContainer();
std::mutex mutex;
std::vector<Entry> counts;
void print() const;
// NOTE: Should be called when other threads aren't active
void clear();
void add_entry(const char* key, std::size_t* count);
void add_entry(const char* key, const std::shared_ptr<OpStats>& count);
std::map<std::string, std::size_t> get_aggregate_counts() const;
};

Expand All @@ -60,28 +70,77 @@ extern GlobalOpCountContainer GLOBAL_OP_COUNTS;
template <OperationLabel Op> struct GlobalOpCount {
public:
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
static thread_local std::size_t* thread_local_count;
static thread_local std::shared_ptr<OpStats> stats;

static OpStats* ensure_stats()
{
if (BB_UNLIKELY(stats == nullptr)) {
stats = std::make_shared<OpStats>();
GLOBAL_OP_COUNTS.add_entry(Op.value, stats);
}
return stats.get();
}
static constexpr void increment_op_count()
{
#ifndef BB_USE_OP_COUNT_TIME_ONLY
if (std::is_constant_evaluated()) {
// We do nothing if the compiler tries to run this
return;
}
ensure_stats();
stats->count++;
#endif
}
static constexpr void add_cycle_time(std::size_t cycles)
{
#ifndef BB_USE_OP_COUNT_TRACK_ONLY
if (std::is_constant_evaluated()) {
// We do nothing if the compiler tries to run this
return;
}
if (BB_UNLIKELY(thread_local_count == nullptr)) {
thread_local_count = new std::size_t();
GLOBAL_OP_COUNTS.add_entry(Op.value, thread_local_count);
ensure_stats();
stats->cycles += cycles;
#endif
}
static constexpr void add_clock_time(std::size_t time)
{
#ifndef BB_USE_OP_COUNT_TRACK_ONLY
if (std::is_constant_evaluated()) {
// We do nothing if the compiler tries to run this
return;
}
(*thread_local_count)++;
ensure_stats();
stats->time += time;
#endif
}
};
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
template <OperationLabel Op> thread_local std::size_t* GlobalOpCount<Op>::thread_local_count;
template <OperationLabel Op> thread_local std::shared_ptr<OpStats> GlobalOpCount<Op>::stats;

// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
struct OpCountCycleReporter {
OpStats* stats;
std::size_t cycles;
OpCountCycleReporter(OpStats* stats);
~OpCountCycleReporter();
};
// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
struct OpCountTimeReporter {
OpStats* stats;
std::size_t time;
OpCountTimeReporter(OpStats* stats);
~OpCountTimeReporter();
};
} // namespace bb::detail

// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK() bb::detail::GlobalOpCount<__func__>::increment_op_count()
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK_NAME(name) bb::detail::GlobalOpCount<name>::increment_op_count()
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_CYCLES() \
bb::detail::OpCountCycleReporter __bb_op_count_cyles(bb::detail::GlobalOpCount<__func__>::ensure_stats())
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TIME() \
bb::detail::OpCountTimeReporter __bb_op_count_time(bb::detail::GlobalOpCount<__func__>::ensure_stats())
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ template <class Fq, class Fr, class T>
std::vector<affine_element<Fq, Fr, T>> element<Fq, Fr, T>::batch_mul_with_endomorphism(
const std::span<affine_element<Fq, Fr, T>>& points, const Fr& exponent) noexcept
{
BB_OP_COUNT_TIME();
typedef affine_element<Fq, Fr, T> affine_element;
const size_t num_points = points.size();

Expand Down
8 changes: 8 additions & 0 deletions yarn-project/sequencer-client/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,13 @@
"types": "./dest/index.d.ts",
"engines": {
"node": ">=18"
},
"jest": {
"preset": "ts-jest/presets/default-esm",
"moduleNameMapper": {
"^(\\.{1,2}/.*)\\.[cm]?js$": "$1"
},
"testRegex": "./src/.*\\.test\\.(js|mjs|ts)$",
"rootDir": "./src"
}
}

0 comments on commit 26918de

Please sign in to comment.