JuliaCI · Zentrik · May 2, 2024 · May 2, 2024 · May 2, 2024 · May 2, 2024
diff --git a/docs/src/manual.md b/docs/src/manual.md
@@ -86,7 +86,18 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
 - `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
 - `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
 
-To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:
+The following keyword arguments relate are experimental and subject to change, see [Running custom benchmarks](@ref) for further details:
+
+- `run_customizable_func_only`: If `true`, only the customizable benchmark. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS..run_customizable_func_only = false`.
+- `enable_customizable_func`: If `:ALL` the customizable benchmark runs on every sample, if `:LAST` the customizable benchmark runs on the last sample, if `:FALSE` the customizable benchmark is never run. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.enable_customizable_func = :FALSE`
+- `customizable_gcsample`: If `true`, runs `gc()` before each sample of the customizable benchmark. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.customizable_gcsample = false`
+- `setup_prehook`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+- `teardown_posthook`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+- `sample_result`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+- `prehook`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+- `posthook`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+
+To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS` (this is not supported for `prehook` and `posthook`), for example:
 
 ```julia
 # change default for `seconds` to 2.5
@@ -347,10 +358,20 @@ BenchmarkTools.Trial
     gcsample: Bool false
     time_tolerance: Float64 0.05
     memory_tolerance: Float64 0.01
+    run_customizable_func_only: Bool false
+    enable_customizable_func: Symbol FALSE
+    customizable_gcsample: Bool false
+    setup_prehook: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
+    teardown_posthook: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
+    sample_result: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
+    prehook: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
+    posthook: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
   times: Array{Float64}((10000,)) [26549.0, 26960.0, 27030.0, 27171.0, 27211.0, 27261.0, 27270.0, 27311.0, 27311.0, 27321.0  …  55383.0, 55934.0, 58649.0, 62847.0, 68547.0, 75761.0, 247081.0, 1.421718e6, 1.488322e6, 1.50329e6]
   gctimes: Array{Float64}((10000,)) [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.366184e6, 1.389518e6, 1.40116e6]
   memory: Int64 16752
   allocs: Int64 19
+  customizable_result: Nothing nothing
+  customizable_result_for_every_sample: Bool false
 ```
 
 As you can see from the above, a couple of different timing estimates are pretty-printed with the `Trial`. You can calculate these estimates yourself using the `minimum`, `maximum`, `median`, `mean`, and `std` functions (Note that `median`, `mean`, and `std` are reexported in `BenchmarkTools` from `Statistics`):
@@ -1008,3 +1029,51 @@ This will display each `Trial` as a violin plot.
 - BenchmarkTools attempts to be robust against machine noise occurring between *samples*, but BenchmarkTools can't do very much about machine noise occurring between *trials*. To cut down on the latter kind of noise, it is advised that you dedicate CPUs and memory to the benchmarking Julia process by using a shielding tool such as [cset](http://manpages.ubuntu.com/manpages/precise/man1/cset.1.html).
 - On some machines, for some versions of BLAS and Julia, the number of BLAS worker threads can exceed the number of available cores. This can occasionally result in scheduling issues and inconsistent performance for BLAS-heavy benchmarks. To fix this issue, you can use `BLAS.set_num_threads(i::Int)` in the Julia REPL to ensure that the number of BLAS threads is equal to or less than the number of available cores.
 - `@benchmark` is evaluated in global scope, even if called from local scope.
+
+## Experimental - Running custom benchmarks
+
+If you want to run code during a benchmark, e.g. to collect different metrics, say using perf, you can configure a custom benchmark.
+A custom benchmark runs in the following way, where `benchmark_function` is the function we are benchmarking:
+```julia
+local setup_prehook_result
+try
+  setup_prehook_result = setup_prehook(params)
+  $(setup)
+  prehook_result = prehook()
+  for _ in 1:evals
+    benchmark_function()
+  end
+  posthook_result = posthook()
+  return sample_result(params, setup_prehook_result, prehook_result, posthook_result)
+finally
+  $(teardown)
+  teardown_posthook(params, setup_prehook_result)
+end
+```
+The result from `sample_result` is collected and can be accessed from the `customizable_result` field of `Trial`, which is the type of a benchmark result.
+
+Note that `prehook` and `posthook` should be as simple and fast as possible, moving any heavy lifting to `setup_prehook`, `sample_result` and `teardown_posthook`.
+
+As an example, these are the hooks to replicate the normal benchmarking functionality
+```julia
+setup_prehook(_) = nothing
+samplefunc_prehook() = (Base.gc_num(), time_ns())
+samplefunc_posthook = samplefunc_prehook
+function samplefunc_sample_result(params, _, prehook_result, posthook_result)
+    evals = params.evals
+    sample_time = posthook_result[2] - prehook_result[2]
+    gcdiff = Base.GC_Diff(posthook_result[1], prehook_result[1])
+
+    time = max((sample_time / evals) - params.overhead, 0.001)
+    gctime = max((gcdiff.total_time / evals) - params.overhead, 0.0)
+    memory = Int(Base.fld(gcdiff.allocd, evals))
+    allocs = Int(
+        Base.fld(
+            gcdiff.malloc + gcdiff.realloc + gcdiff.poolalloc + gcdiff.bigalloc,
+            evals,
+        ),
+    )
+    return time, gctime, memory, allocs
+end
+teardown_posthook(_, _) = nothing
+```
diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl
@@ -8,7 +8,7 @@ using Statistics
 using UUIDs: uuid4
 using Printf
 using Profile
-using Compat
+using Compat: pkgversion, @noinline
 
 ##############
 # Parameters #

diff --git a/src/execution.jl b/src/execution.jl
@@ -16,6 +16,7 @@ end
 
 mutable struct Benchmark
     samplefunc
+    customizable_func
     quote_vals
     params::Parameters
 end
@@ -109,21 +110,61 @@ end
 function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, kwargs...)
     params = Parameters(p; kwargs...)
     @assert params.seconds > 0.0 "time limit must be greater than 0.0"
-    if warmup
-        b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample
+    if warmup #warmup sample
+        params.run_customizable_func_only &&
+            b.samplefunc(b.quote_vals, Parameters(params; evals=1))
+        !params.run_customizable_func_only &&
+            b.customizable_func(b.quote_vals, Parameters(params; evals=1))
     end
     trial = Trial(params)
+    if params.enable_customizable_func == :ALL
+        trial.customizable_result = []
+        trial.customizable_result_for_every_sample = true
+    end
     params.gctrial && gcscrub()
     start_time = Base.time()
-    s = b.samplefunc(b.quote_vals, params)
-    push!(trial, s[1:(end - 1)]...)
-    return_val = s[end]
+
+    return_val = nothing
+    if !params.run_customizable_func_only
+        s = b.samplefunc(b.quote_vals, params)
+        push!(trial, s[1:(end - 1)]...)
+        return_val = s[end]
+    end
+    if params.enable_customizable_func == :ALL
+        params.customizable_gcsample && gcscrub()
+        s = b.customizable_func(b.quote_vals, params)
+        push!(trial.customizable_result, s[1])
+
+        if params.run_customizable_func_only
+            return_val = s[end]
+        end
+    end
+
     iters = 2
     while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
-        params.gcsample && gcscrub()
-        push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
+        if !params.run_customizable_func_only
+            params.gcsample && gcscrub()
+            push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
+        end
+
+        if params.enable_customizable_func == :ALL
+            params.customizable_gcsample && gcscrub()
+            push!(trial.customizable_result, b.customizable_func(b.quote_vals, params)[1])
+        end
+
         iters += 1
     end
+
+    if params.enable_customizable_func == :LAST
+        params.customizable_gcsample && gcscrub()
+        s = b.customizable_func(b.quote_vals, params)
+        trial.customizable_result = s[1]
+
+        if params.run_customizable_func_only
+            return_val = s[end]
+        end
+    end
+
     return trial, return_val
 end
 
@@ -506,6 +547,24 @@ macro benchmarkable(args...)
     end
 end
 
+samplefunc_prehook() = (Base.gc_num(), time_ns())
+samplefunc_posthook = samplefunc_prehook
+function samplefunc_sample_result(__params, _, prehook_result, posthook_result)
+    __evals = __params.evals
+    __sample_time = posthook_result[2] - prehook_result[2]
+    __gcdiff = Base.GC_Diff(posthook_result[1], prehook_result[1])
+
+    __time = max((__sample_time / __evals) - __params.overhead, 0.001)
+    __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
+    __memory = Int(Base.fld(__gcdiff.allocd, __evals))
+    __allocs = Int(
+        Base.fld(
+            __gcdiff.malloc + __gcdiff.realloc + __gcdiff.poolalloc + __gcdiff.bigalloc,
+            __evals,
+        ),
+    )
+    return __time, __gctime, __memory, __allocs
+end
 # `eval` an expression that forcibly defines the specified benchmark at
 # top-level in order to allow transfer of locally-scoped variables into
 # benchmark scope.
@@ -519,6 +578,7 @@ function generate_benchmark_definition(
     @nospecialize
     corefunc = gensym("core")
     samplefunc = gensym("sample")
+    customizable_func = gensym("customizable")
     type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
     signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
     signature_def = Expr(
@@ -562,32 +622,66 @@ function generate_benchmark_definition(
             @noinline function $(samplefunc)(
                 $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
             )
-                $(setup)
-                __evals = __params.evals
-                __gc_start = Base.gc_num()
-                __start_time = time_ns()
-                __return_val = $(invocation)
-                for __iter in 2:__evals
-                    $(invocation)
-                end
-                __sample_time = time_ns() - __start_time
-                __gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
+                $BenchmarkTools.@noinline $(setup)
+                # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
+                # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
+                __prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
+                    function (__evals)
+                        prehook_result = $BenchmarkTools.samplefunc_prehook()
+                        $BenchmarkTools.@noinline __return_val_2 = $(invocation)
+                        for __iter in 2:__evals
+                            $BenchmarkTools.@noinline $(invocation)
+                        end
+                        posthook_result = $BenchmarkTools.samplefunc_posthook()
+                        return prehook_result, posthook_result, __return_val_2
+                    end
+                )(
+                    __params.evals
+                )
                 $(teardown)
-                __time = max((__sample_time / __evals) - __params.overhead, 0.001)
-                __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
-                __memory = Int(Base.fld(__gcdiff.allocd, __evals))
-                __allocs = Int(
-                    Base.fld(
-                        __gcdiff.malloc +
-                        __gcdiff.realloc +
-                        __gcdiff.poolalloc +
-                        __gcdiff.bigalloc,
-                        __evals,
+                return $BenchmarkTools.samplefunc_sample_result(
+                    __params, nothing, __prehook_result, __posthook_result
+                )...,
+                __return_val
+            end
+            @noinline function $(customizable_func)(
+                $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
+            )
+                local __setup_prehook_result
+                try
+                    __setup_prehook_result = $BenchmarkTools.@noinline __params.setup_prehook(
+                        __params
+                    )
+                    $BenchmarkTools.@noinline $(setup)
+                    __prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
+                        function (__evals)
+                            prehook_result = __params.prehook()
+                            # We'll run it evals times.
+                            $BenchmarkTools.@noinline __return_val_2 = $(invocation)
+                            for __iter in 2:__evals
+                                $BenchmarkTools.@noinline $(invocation)
+                            end
+                            posthook_result = __params.posthook()
+                            return prehook_result, posthook_result, __return_val_2
+                        end
+                    )(
+                        __params.evals
+                    )
+                    return __params.sample_result(
+                        __params,
+                        __setup_prehook_result,
+                        __prehook_result,
+                        __posthook_result,
                     ),
-                )
-                return __time, __gctime, __memory, __allocs, __return_val
+                    __return_val
+                finally
+                    $(teardown)
+                    __params.teardown_posthook(__params, __setup_prehook_result)
+                end
             end
-            $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
+            $BenchmarkTools.Benchmark(
+                $(samplefunc), $(customizable_func), $(quote_vals), $(params)
+            )
         end,
     )
 end