From 4cb3aa5c38bc122cd1147442ec4a077684fa03a1 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:42:55 +0100
Subject: [PATCH 01/29] Add parameters for linux perf

---
 src/parameters.jl | 77 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/src/parameters.jl b/src/parameters.jl
index ff1bc615..85b5fcb1 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -15,9 +15,32 @@ mutable struct Parameters
     gcsample::Bool
     time_tolerance::Float64
     memory_tolerance::Float64
+    enable_linux_perf::Bool
+    linux_perf_groups::String
+    linux_perf_spaces::NTuple{3,Bool}
+    linux_perf_threads::Bool
+    linux_perf_gcscrub::Bool
 end
 
-const DEFAULT_PARAMETERS = Parameters(5.0, 10000, 1, false, 0, true, false, 0.05, 0.01)
+# Task clock has large overhead so is not useful for the short time we run functions under perf
+# Further we benchmark anyways so no need for cycles or task clock
+# I've tried to only use one group by getting rid of noisy or not useful metrics
+const DEFAULT_PARAMETERS = Parameters(
+    5.0,
+    10000,
+    1,
+    false,
+    0,
+    true,
+    false,
+    0.05,
+    0.01,
+    false,
+    "(instructions,branch-instructions)",
+    (true, false, false),
+    true,
+    true,
+)
 
 function Parameters(;
     seconds=DEFAULT_PARAMETERS.seconds,
@@ -29,6 +52,11 @@ function Parameters(;
     gcsample=DEFAULT_PARAMETERS.gcsample,
     time_tolerance=DEFAULT_PARAMETERS.time_tolerance,
     memory_tolerance=DEFAULT_PARAMETERS.memory_tolerance,
+    enable_linux_perf=DEFAULT_PARAMETERS.enable_linux_perf,
+    linux_perf_groups=DEFAULT_PARAMETERS.linux_perf_groups,
+    linux_perf_spaces=DEFAULT_PARAMETERS.linux_perf_spaces,
+    linux_perf_threads=DEFAULT_PARAMETERS.linux_perf_threads,
+    linux_perf_gcscrub=DEFAULT_PARAMETERS.linux_perf_gcscrub,
 )
     return Parameters(
         seconds,
@@ -40,6 +68,11 @@ function Parameters(;
         gcsample,
         time_tolerance,
         memory_tolerance,
+        enable_linux_perf,
+        linux_perf_groups,
+        linux_perf_spaces,
+        linux_perf_threads,
+        linux_perf_gcscrub,
     )
 end
 
@@ -53,6 +86,11 @@ function Parameters(
     gcsample=nothing,
     time_tolerance=nothing,
     memory_tolerance=nothing,
+    enable_linux_perf=nothing,
+    linux_perf_groups=nothing,
+    linux_perf_spaces=nothing,
+    linux_perf_threads=nothing,
+    linux_perf_gcscrub=nothing,
 )
     params = Parameters()
     params.seconds = seconds != nothing ? seconds : default.seconds
@@ -65,6 +103,31 @@ function Parameters(
         time_tolerance != nothing ? time_tolerance : default.time_tolerance
     params.memory_tolerance =
         memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
+    params.enable_linux_perf = if enable_linux_perf != nothing
+        enable_linux_perf
+    else
+        default.enable_linux_perf
+    end
+    params.linux_perf_groups = if linux_perf_groups != nothing
+        linux_perf_groups
+    else
+        default.linux_perf_groups
+    end
+    params.linux_perf_spaces = if linux_perf_spaces != nothing
+        linux_perf_spaces
+    else
+        default.linux_perf_spaces
+    end
+    params.linux_perf_threads = if linux_perf_threads != nothing
+        linux_perf_threads
+    else
+        default.linux_perf_threads
+    end
+    params.linux_perf_gcscrub = if linux_perf_gcscrub != nothing
+        linux_perf_gcscrub
+    else
+        default.linux_perf_gcscrub
+    end
     return params::BenchmarkTools.Parameters
 end
 
@@ -76,7 +139,12 @@ function Base.:(==)(a::Parameters, b::Parameters)
            a.gctrial == b.gctrial &&
            a.gcsample == b.gcsample &&
            a.time_tolerance == b.time_tolerance &&
-           a.memory_tolerance == b.memory_tolerance
+           a.memory_tolerance == b.memory_tolerance &&
+           a.enable_linux_perf == b.enable_linux_perf &&
+           a.linux_perf_groups == b.linux_perf_groups &&
+           a.linux_perf_spaces == b.linux_perf_spaces &&
+           a.linux_perf_threads == b.linux_perf_threads &&
+           a.linux_perf_gcscrub == b.linux_perf_gcscrub
 end
 
 function Base.copy(p::Parameters)
@@ -90,6 +158,11 @@ function Base.copy(p::Parameters)
         p.gcsample,
         p.time_tolerance,
         p.memory_tolerance,
+        p.enable_linux_perf,
+        p.linux_perf_groups,
+        p.linux_perf_spaces,
+        p.linux_perf_threads,
+        p.linux_perf_gcscrub,
     )
 end
 

From f3894928e10e321237060c195c0053ca6fd5b0a3 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:43:45 +0100
Subject: [PATCH 02/29] Add deserialisation support for linux perf parameters

---
 src/serialization.jl | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/serialization.jl b/src/serialization.jl
index 7bec2c8d..d711662e 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -60,6 +60,22 @@ function recover(x::Vector)
                 # JSON spec doesn't support Inf
                 # These fields should all be >= 0, so we can ignore -Inf case
                 typemax(ft)
+            elseif fn == "enable_linux_perf" && !haskey(fields, fn)
+                false
+            elseif fn in (
+                "linux_perf_groups",
+                "linux_perf_spaces",
+                "linux_perf_threads",
+                "linux_perf_gcscrub",
+            ) && !haskey(fields, fn)
+                getfield(BenchmarkTools.DEFAULT_PARAMETERS, Symbol(fn))
+            elseif fn == "linux_perf_spaces" && haskey(fields, fn)
+                length(fields[fn]) == 3 || throw(
+                    ArgumentError(
+                        "Expecting a vector of length 3 for linux_perf_spaces parameter",
+                    ),
+                )
+                xsi = convert(ft, (fields[fn][1], fields[fn][2], fields[fn][3]))
             else
                 convert(ft, fields[fn])
             end

From a375ad6b011e45e1c9011d2b5ec6e2d591fb423d Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:43:58 +0100
Subject: [PATCH 03/29] Add linux perf parameters to manual

---
 docs/src/manual.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index df2755cf..1a651ec6 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -85,6 +85,11 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
 - `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gcsample = false`.
 - `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
 - `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
+- `enable_linux_perf`: If `true`, profile using perf `evals` times. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = false`.
+- `linux_perf_groups`: The event groups you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = "(instructions,branch-instructions)"`.
+- `linux_perf_spaces`: Which of user, kernel and hypervisor space you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = (true, false, false)`.
+- `linux_perf_threads`: If `true`, all threads are profiled else only the the thread which starts the benchmark is profiled, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = true`.
+- `linux_perf_gcscrub`: If `true`, run `gc()` before executing the profiling run. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = true`.
 
 To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:
 

From a80fb58dc7fa564efb4ec55f85f2a4a559697f69 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:44:15 +0100
Subject: [PATCH 04/29] Add linux perf parameters to tests

---
 test/ParametersTests.jl    | 20 +++++++++
 test/SerializationTests.jl | 91 +++++++++++++++++++++++++++++---------
 2 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/test/ParametersTests.jl b/test/ParametersTests.jl
index 9fa07027..b028dd7d 100644
--- a/test/ParametersTests.jl
+++ b/test/ParametersTests.jl
@@ -26,6 +26,11 @@ p = Parameters(;
     gcsample=false,
     time_tolerance=0.043,
     memory_tolerance=0.15,
+    enable_linux_perf=false,
+    linux_perf_groups="(branch-instructions)",
+    linux_perf_spaces=(true, true, false),
+    linux_perf_threads=false,
+    linux_perf_gcscrub=false,
 )
 oldseconds = BenchmarkTools.DEFAULT_PARAMETERS.seconds
 oldgctrial = BenchmarkTools.DEFAULT_PARAMETERS.gctrial
@@ -35,6 +40,11 @@ oldsamples = BenchmarkTools.DEFAULT_PARAMETERS.samples
 oldevals = BenchmarkTools.DEFAULT_PARAMETERS.evals
 oldoverhead = BenchmarkTools.DEFAULT_PARAMETERS.overhead
 oldgcsample = BenchmarkTools.DEFAULT_PARAMETERS.gcsample
+old_enable_linux_perf = BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf
+old_linux_perf_groups = BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups
+old_linux_perf_spaces = BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces
+old_linux_perf_threads = BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads
+old_enable_linux_gcsample = BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = p.seconds
 BenchmarkTools.DEFAULT_PARAMETERS.gctrial = p.gctrial
 BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = p.time_tolerance
@@ -43,6 +53,11 @@ BenchmarkTools.DEFAULT_PARAMETERS.samples = p.samples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = p.evals
 BenchmarkTools.DEFAULT_PARAMETERS.overhead = p.overhead
 BenchmarkTools.DEFAULT_PARAMETERS.gcsample = p.gcsample
+BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = p.enable_linux_perf
+BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = p.linux_perf_groups
+BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = p.linux_perf_spaces
+BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = p.linux_perf_threads
+BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = p.linux_perf_gcscrub
 @test p == Parameters()
 @test p == Parameters(p)
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = oldseconds
@@ -53,5 +68,10 @@ BenchmarkTools.DEFAULT_PARAMETERS.samples = oldsamples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = oldevals
 BenchmarkTools.DEFAULT_PARAMETERS.overhead = oldoverhead
 BenchmarkTools.DEFAULT_PARAMETERS.gcsample = oldgcsample
+BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = old_enable_linux_perf
+BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = old_linux_perf_groups
+BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = old_linux_perf_spaces
+BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = old_linux_perf_threads
+BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = old_enable_linux_gcsample
 
 end # module
diff --git a/test/SerializationTests.jl b/test/SerializationTests.jl
index e24314a1..75134beb 100644
--- a/test/SerializationTests.jl
+++ b/test/SerializationTests.jl
@@ -19,21 +19,23 @@ function withtempdir(f::Function)
 end
 
 @testset "Successful (de)serialization" begin
-    b = @benchmarkable sin(1)
-    tune!(b)
-    bb = run(b)
-
-    withtempdir() do
-        tmp = joinpath(pwd(), "tmp.json")
-
-        BenchmarkTools.save(tmp, b.params, bb)
-        @test isfile(tmp)
-
-        results = BenchmarkTools.load(tmp)
-        @test results isa Vector{Any}
-        @test length(results) == 2
-        @test eq(results[1], b.params)
-        @test eq(results[2], bb)
+    for enable_linux_perf in (false, true)
+        b = @benchmarkable sin(1) enable_linux_perf = enable_linux_perf
+        tune!(b)
+        bb = run(b)
+
+        withtempdir() do
+            tmp = joinpath(pwd(), "tmp.json")
+
+            BenchmarkTools.save(tmp, b.params, bb)
+            @test isfile(tmp)
+
+            results = BenchmarkTools.load(tmp)
+            @test results isa Vector{Any}
+            @test length(results) == 2
+            @test eq(results[1], b.params)
+            @test eq(results[2], bb)
+        end
     end
 
     # Nested BenchmarkGroups
@@ -99,22 +101,69 @@ end
     @test_throws ArgumentError BenchmarkTools.recover([1])
 end
 
-@testset "Backwards Comppatibility with evals_set" begin
+@testset "Backwards Compatibility with evals_set and linux perf options" begin
     json_string = "[{\"Julia\":\"1.11.0-DEV.1116\",\"BenchmarkTools\":\"1.4.0\"},[[\"Parameters\",{\"gctrial\":true,\"time_tolerance\":0.05,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":5.0,\"overhead\":0.0,\"memory_tolerance\":0.01}]]]"
     json_io = IOBuffer(json_string)
 
-    @test BenchmarkTools.load(json_io) ==
-        [BenchmarkTools.Parameters(5.0, 10000, 1, false, 0.0, true, false, 0.05, 0.01)]
+    @test BenchmarkTools.load(json_io) == [
+        BenchmarkTools.Parameters(
+            5.0,
+            10000,
+            1,
+            false,
+            0.0,
+            true,
+            false,
+            0.05,
+            0.01,
+            false,
+            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups,
+            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces,
+            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads,
+            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub,
+        ),
+    ]
 
     json_string = "[{\"Julia\":\"1.11.0-DEV.1116\",\"BenchmarkTools\":\"1.4.0\"},[[\"Parameters\",{\"gctrial\":true,\"time_tolerance\":0.05,\"evals_set\":true,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":5.0,\"overhead\":0.0,\"memory_tolerance\":0.01}]]]"
     json_io = IOBuffer(json_string)
 
-    @test BenchmarkTools.load(json_io) ==
-        [BenchmarkTools.Parameters(5.0, 10000, 1, true, 0.0, true, false, 0.05, 0.01)]
+    @test BenchmarkTools.load(json_io) == [
+        BenchmarkTools.Parameters(
+            5.0,
+            10000,
+            1,
+            true,
+            0.0,
+            true,
+            false,
+            0.05,
+            0.01,
+            false,
+            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups,
+            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces,
+            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads,
+            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub,
+        ),
+    ]
 end
 
 @testset "Inf in Paramters struct" begin
-    params = BenchmarkTools.Parameters(Inf, 10000, 1, false, Inf, true, false, Inf, Inf)
+    params = BenchmarkTools.Parameters(
+        Inf,
+        10000,
+        1,
+        false,
+        Inf,
+        true,
+        false,
+        Inf,
+        Inf,
+        false,
+        BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups,
+        BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces,
+        BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads,
+        BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub,
+    )
 
     io = IOBuffer()
     BenchmarkTools.save(io, params)

From 79613d48f74298d5b42deffedca1d21ae885ca40 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:45:03 +0100
Subject: [PATCH 05/29] Add field to store perf results

---
 src/trials.jl | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/src/trials.jl b/src/trials.jl
index 67382813..4bc88d54 100644
--- a/src/trials.jl
+++ b/src/trials.jl
@@ -8,6 +8,11 @@ mutable struct Trial
     gctimes::Vector{Float64}
     memory::Int
     allocs::Int
+    linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
+
+    function Trial(params, times, gctimes, memory, allocs, linux_perf_stats=nothing)
+        return new(params, times, gctimes, memory, allocs, linux_perf_stats)
+    end
 end
 
 Trial(params::Parameters) = Trial(params, Float64[], Float64[], typemax(Int), typemax(Int))
@@ -21,7 +26,14 @@ function Base.:(==)(a::Trial, b::Trial)
 end
 
 function Base.copy(t::Trial)
-    return Trial(copy(t.params), copy(t.times), copy(t.gctimes), t.memory, t.allocs)
+    return Trial(
+        copy(t.params),
+        copy(t.times),
+        copy(t.gctimes),
+        t.memory,
+        t.allocs,
+        t.linux_perf_stats,
+    )
 end
 
 function Base.push!(t::Trial, time, gctime, memory, allocs)
@@ -40,9 +52,13 @@ end
 
 Base.length(t::Trial) = length(t.times)
 function Base.getindex(t::Trial, i::Number)
-    return push!(Trial(t.params), t.times[i], t.gctimes[i], t.memory, t.allocs)
+    return Trial(
+        t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats
+    )
+end
+function Base.getindex(t::Trial, i)
+    return Trial(t.params, t.times[i], t.gctimes[i], t.memory, t.allocs, t.linux_perf_stats)
 end
-Base.getindex(t::Trial, i) = Trial(t.params, t.times[i], t.gctimes[i], t.memory, t.allocs)
 Base.lastindex(t::Trial) = length(t)
 
 function Base.sort!(t::Trial)
@@ -98,10 +114,17 @@ mutable struct TrialEstimate
     gctime::Float64
     memory::Int
     allocs::Int
+    linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
+
+    function TrialEstimate(params, times, gctime, memory, allocs, linux_perf_stats=nothing)
+        return new(params, times, gctime, memory, allocs, linux_perf_stats)
+    end
 end
 
 function TrialEstimate(trial::Trial, t, gct)
-    return TrialEstimate(params(trial), t, gct, memory(trial), allocs(trial))
+    return TrialEstimate(
+        params(trial), t, gct, memory(trial), allocs(trial), trial.linux_perf_stats
+    )
 end
 
 function Base.:(==)(a::TrialEstimate, b::TrialEstimate)
@@ -113,7 +136,9 @@ function Base.:(==)(a::TrialEstimate, b::TrialEstimate)
 end
 
 function Base.copy(t::TrialEstimate)
-    return TrialEstimate(copy(t.params), t.time, t.gctime, t.memory, t.allocs)
+    return TrialEstimate(
+        copy(t.params), t.time, t.gctime, t.memory, t.allocs, t.linux_perf_stats
+    )
 end
 
 function Base.minimum(trial::Trial)

From f7d8ab452875d249f6c74d9f22b1677b93427675 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:46:56 +0100
Subject: [PATCH 06/29] Add serialization support for perf results

---
 src/serialization.jl | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/serialization.jl b/src/serialization.jl
index d711662e..a6d28fa6 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -29,6 +29,31 @@ function JSON.lower(x::Union{values(SUPPORTED_TYPES)...})
     return [string(nameof(typeof(x))), d]
 end
 
+# Recovers LinuxPerf.Stats from serialized form
+function _convert(::Type{Union{Nothing,LinuxPerf.Stats}}, d)
+    if isnothing(d)
+        return nothing
+    end
+    return LinuxPerf.Stats(_convert.(LinuxPerf.ThreadStats, d["threads"]))
+end
+function _convert(::Type{LinuxPerf.ThreadStats}, d::Dict{String})
+    return LinuxPerf.ThreadStats(
+        d["pid"],
+        [
+            [_convert(LinuxPerf.Counter, counter) for counter in group] for
+            group in d["groups"]
+        ],
+    )
+end
+function _convert(::Type{LinuxPerf.Counter}, d::Dict{String})
+    return LinuxPerf.Counter(
+        _convert(LinuxPerf.EventType, d["event"]), d["value"], d["enabled"], d["running"]
+    )
+end
+function _convert(::Type{LinuxPerf.EventType}, d::Dict{String})
+    return LinuxPerf.EventType(d["category"], d["event"])
+end
+
 # a minimal 'eval' function, mirroring KeyTypes, but being slightly more lenient
 safeeval(@nospecialize x) = x
 safeeval(x::QuoteNode) = x.value
@@ -50,7 +75,9 @@ function recover(x::Vector)
     for i in 1:fc
         ft = fieldtype(T, i)
         fn = String(fieldname(T, i))
-        if ft <: get(SUPPORTED_TYPES, nameof(ft), Union{})
+        if ft == Union{Nothing,LinuxPerf.Stats}
+            xsi = _convert(ft, fields[fn])
+        elseif ft <: get(SUPPORTED_TYPES, nameof(ft), Union{})
             xsi = recover(fields[fn])
         else
             xsi = if fn == "evals_set" && !haskey(fields, fn)

From f1eb834162c2401415017f3162bd32f7c57086e5 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:49:40 +0100
Subject: [PATCH 07/29] Add LinuxPerf and record its version is serializations

---
 Project.toml          | 2 ++
 src/BenchmarkTools.jl | 2 ++
 src/serialization.jl  | 4 +++-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 24139691..d34e7ba1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,6 +5,7 @@ version = "1.6.0"
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
+LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
@@ -16,6 +17,7 @@ Aqua = "0.8"
 Compat = ">= 4.11.0"
 JSON = "0.18, 0.19, 0.20, 0.21"
 JuliaFormatter = "1"
+LinuxPerf = "0.3.8"
 Logging = "<0.0.1, 1"
 Printf = "<0.0.1, 1"
 Profile = "<0.0.1, 1"
diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl
index 37102cbe..5a14bdab 100644
--- a/src/BenchmarkTools.jl
+++ b/src/BenchmarkTools.jl
@@ -10,6 +10,8 @@ using Printf
 using Profile
 using Compat
 
+using LinuxPerf: LinuxPerf
+
 ##############
 # Parameters #
 ##############
diff --git a/src/serialization.jl b/src/serialization.jl
index a6d28fa6..8df49a74 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -1,5 +1,7 @@
 const VERSIONS = Dict(
-    "Julia" => string(VERSION), "BenchmarkTools" => pkgversion(BenchmarkTools)
+    "Julia" => string(VERSION),
+    "BenchmarkTools" => pkgversion(BenchmarkTools),
+    "LinuxPerf" => pkgversion(LinuxPerf),
 )
 
 # TODO: Add any new types as they're added

From de87b4637655ab92d408029e073a41e923ae8679 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:50:02 +0100
Subject: [PATCH 08/29] Add serialization test support for perf results

---
 test/SerializationTests.jl | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/test/SerializationTests.jl b/test/SerializationTests.jl
index 75134beb..c6a0e9fb 100644
--- a/test/SerializationTests.jl
+++ b/test/SerializationTests.jl
@@ -1,11 +1,22 @@
 module SerializationTests
 
-using BenchmarkTools
+using BenchmarkTools, LinuxPerf
 using Test
 
 function eq(x::T, y::T) where {T<:Union{values(BenchmarkTools.SUPPORTED_TYPES)...}}
     return all(i -> eq(getfield(x, i), getfield(y, i)), 1:fieldcount(T))
 end
+eq(x::String, y::String) = x == y
+eq(x::NTuple{3,Bool}, y::NTuple{3,Bool}) = x == y
+function eq(x::LinuxPerf.Stats, y::LinuxPerf.Stats)
+    return all(a -> eq(a[1], a[2]), zip(x.threads, y.threads))
+end
+function eq(x::LinuxPerf.ThreadStats, y::LinuxPerf.ThreadStats)
+    return x.pid == y.pid && x.groups == y.groups
+end
+eq(x::Nothing, y) = isnothing(y)
+eq(x, y::Nothing) = isnothing(x)
+eq(x::Nothing, y::Nothing) = true
 eq(x::T, y::T) where {T} = isapprox(x, y)
 
 function withtempdir(f::Function)

From 8179bc5ee66c29de79ec95fa28a10781b22aec69 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:52:07 +0100
Subject: [PATCH 09/29] Run benchmark under perf if enabled

---
 src/execution.jl | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/src/execution.jl b/src/execution.jl
index a9c3e25b..5bb524f3 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -16,6 +16,7 @@ end
 
 mutable struct Benchmark
     samplefunc
+    linux_perf_func
     quote_vals
     params::Parameters
 end
@@ -124,6 +125,12 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k
         push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
         iters += 1
     end
+
+    if params.enable_linux_perf
+        params.linux_perf_gcscrub && gcscrub()
+        trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
+    end
+
     return trial, return_val
 end
 
@@ -519,6 +526,7 @@ function generate_benchmark_definition(
     @nospecialize
     corefunc = gensym("core")
     samplefunc = gensym("sample")
+    linux_perf_func = gensym("perf")
     type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
     signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
     signature_def = Expr(
@@ -587,7 +595,43 @@ function generate_benchmark_definition(
                 )
                 return __time, __gctime, __memory, __allocs, __return_val
             end
-            $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
+            @noinline function $(linux_perf_func)(
+                $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
+            )
+                # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
+                __linux_perf_groups = $LinuxPerf.set_default_spaces(
+                    $LinuxPerf.parse_groups(__params.linux_perf_groups),
+                    __params.linux_perf_spaces,
+                )
+                __linux_perf_bench = $LinuxPerf.make_bench_threaded(
+                    __linux_perf_groups; threads=__params.linux_perf_threads
+                )
+
+                try
+                    @noinline $(setup)
+                    __evals = __params.evals
+                    # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
+                    # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
+                    @noinline (function (__evals)
+                        $LinuxPerf.enable_all!()
+                        # We'll run it evals times.
+                        @noinline __return_val_2 = $(invocation)
+                        for __iter in 2:__evals
+                            @noinline $(invocation)
+                        end
+                        $LinuxPerf.disable_all!()
+                        # trick the compiler not to eliminate the code
+                        return __return_val_2
+                    end)(__evals)
+                    return $LinuxPerf.Stats(__linux_perf_bench)
+                finally
+                    close(__linux_perf_bench)
+                    $(teardown)
+                end
+            end
+            $BenchmarkTools.Benchmark(
+                $(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
+            )
         end,
     )
 end

From 3a9db26efef3f92c2f03c61e73af86203ebbd226 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 18:52:39 +0100
Subject: [PATCH 10/29] Add tests that running under perf works

---
 test/ExecutionTests.jl | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
index 57666815..5e91d449 100644
--- a/test/ExecutionTests.jl
+++ b/test/ExecutionTests.jl
@@ -3,6 +3,7 @@ module ExecutionTests
 using BenchmarkTools
 using Profile
 using Test
+using LinuxPerf
 
 seteq(a, b) = length(a) == length(b) == length(intersect(a, b))
 
@@ -382,4 +383,34 @@ b = x = nothing
 GC.gc()
 @test x_finalized
 
+##################################
+# Linux Perf Integration #
+##################################
+
+b = @benchmarkable sin($(Ref(42.0))[])
+results = run(b; seconds=1, enable_linux_perf=false)
+@test results.linux_perf_stats === nothing
+
+b = @benchmarkable sin($(Ref(42.0))[])
+results = run(b; seconds=1)
+@test results.linux_perf_stats === nothing
+
+b = @benchmarkable sin($(Ref(42.0))[])
+results = run(b; seconds=1, enable_linux_perf=true, evals=10^3)
+@test results.linux_perf_stats !== nothing
+@test any(results.linux_perf_stats.threads) do thread
+    instructions = LinuxPerf.scaledcount(thread["instructions"])
+    !isnan(instructions) && instructions > 10^4
+end
+
+tune!(groups)
+results = run(groups; enable_linux_perf=true)
+for (name, group_results) in BenchmarkTools.leaves(results)
+    @test group_results.linux_perf_stats !== nothing
+    @test any(group_results.linux_perf_stats.threads) do thread
+        instructions = LinuxPerf.scaledcount(thread["instructions"])
+        !isnan(instructions) && instructions > 10^3
+    end
+end
+
 end # module

From 15a0a392750bf5e7829849687e2dd304e8f30c99 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 19:16:26 +0100
Subject: [PATCH 11/29] Import noinline to support older Julia versions

---
 src/BenchmarkTools.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl
index 5a14bdab..362d7b35 100644
--- a/src/BenchmarkTools.jl
+++ b/src/BenchmarkTools.jl
@@ -8,7 +8,7 @@ using Statistics
 using UUIDs: uuid4
 using Printf
 using Profile
-using Compat
+using Compat: pkgversion, @noinline
 
 using LinuxPerf: LinuxPerf
 

From dc5e52837e988c0aaa018400ac1be3193e20584f Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 21:51:24 +0100
Subject: [PATCH 12/29] Run linux perf integration test only on buildkite

---
 .buildkite/pipeline.yml           |  6 ++--
 .github/workflows/CI.yml          |  2 ++
 test/ExecutionTests.jl            | 31 -----------------
 test/LinuxPerfIntegrationTests.jl | 55 +++++++++++++++++++++++++++++++
 test/SerializationTests.jl        | 32 +++++++++---------
 test/runtests.jl                  |  6 ++++
 6 files changed, 81 insertions(+), 51 deletions(-)
 create mode 100644 test/LinuxPerfIntegrationTests.jl

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index e90054d8..45d42406 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,12 +1,12 @@
 ---
 steps:
-  - label: ":test_tube: Test the package"
+  - label: "Julia 1 - Test LinuxPerf Integration"
     plugins:
       - JuliaCI/julia#v1:
           version: "1"
-    commands: |
-      julia --version
+      - JuliaCI/julia-test#v1:
     agents:
       queue: "juliaecosystem"
       os: "linux"
       arch: "x86_64"
+    timeout_in_minutes: 15
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index b22812a9..5f39504d 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -51,6 +51,8 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
+        env:
+          TEST_PERF_INTEGRATION: false
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v3
         with:
diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
index 5e91d449..57666815 100644
--- a/test/ExecutionTests.jl
+++ b/test/ExecutionTests.jl
@@ -3,7 +3,6 @@ module ExecutionTests
 using BenchmarkTools
 using Profile
 using Test
-using LinuxPerf
 
 seteq(a, b) = length(a) == length(b) == length(intersect(a, b))
 
@@ -383,34 +382,4 @@ b = x = nothing
 GC.gc()
 @test x_finalized
 
-##################################
-# Linux Perf Integration #
-##################################
-
-b = @benchmarkable sin($(Ref(42.0))[])
-results = run(b; seconds=1, enable_linux_perf=false)
-@test results.linux_perf_stats === nothing
-
-b = @benchmarkable sin($(Ref(42.0))[])
-results = run(b; seconds=1)
-@test results.linux_perf_stats === nothing
-
-b = @benchmarkable sin($(Ref(42.0))[])
-results = run(b; seconds=1, enable_linux_perf=true, evals=10^3)
-@test results.linux_perf_stats !== nothing
-@test any(results.linux_perf_stats.threads) do thread
-    instructions = LinuxPerf.scaledcount(thread["instructions"])
-    !isnan(instructions) && instructions > 10^4
-end
-
-tune!(groups)
-results = run(groups; enable_linux_perf=true)
-for (name, group_results) in BenchmarkTools.leaves(results)
-    @test group_results.linux_perf_stats !== nothing
-    @test any(group_results.linux_perf_stats.threads) do thread
-        instructions = LinuxPerf.scaledcount(thread["instructions"])
-        !isnan(instructions) && instructions > 10^3
-    end
-end
-
 end # module
diff --git a/test/LinuxPerfIntegrationTests.jl b/test/LinuxPerfIntegrationTests.jl
new file mode 100644
index 00000000..c1a5e2b1
--- /dev/null
+++ b/test/LinuxPerfIntegrationTests.jl
@@ -0,0 +1,55 @@
+module LinuxPerfIntegrationTests
+
+using BenchmarkTools
+using Test
+using LinuxPerf
+
+### Serialization Test ###
+b = @benchmarkable sin(1) enable_linux_perf = true
+tune!(b)
+bb = run(b)
+
+withtempdir() do
+    tmp = joinpath(pwd(), "tmp.json")
+
+    BenchmarkTools.save(tmp, b.params, bb)
+    @test isfile(tmp)
+
+    results = BenchmarkTools.load(tmp)
+    @test results isa Vector{Any}
+    @test length(results) == 2
+    @test eq(results[1], b.params)
+    @test eq(results[2], bb)
+end
+
+##################################
+# Linux Perf Integration #
+##################################
+
+b = @benchmarkable sin($(Ref(42.0))[])
+results = run(b; seconds=1, enable_linux_perf=false)
+@test results.linux_perf_stats === nothing
+
+b = @benchmarkable sin($(Ref(42.0))[])
+results = run(b; seconds=1)
+@test results.linux_perf_stats === nothing
+
+b = @benchmarkable sin($(Ref(42.0))[])
+results = run(b; seconds=1, enable_linux_perf=true, evals=10^3)
+@test results.linux_perf_stats !== nothing
+@test any(results.linux_perf_stats.threads) do thread
+    instructions = LinuxPerf.scaledcount(thread["instructions"])
+    !isnan(instructions) && instructions > 10^4
+end
+
+tune!(groups)
+results = run(groups; enable_linux_perf=true)
+for (name, group_results) in BenchmarkTools.leaves(results)
+    @test group_results.linux_perf_stats !== nothing
+    @test any(group_results.linux_perf_stats.threads) do thread
+        instructions = LinuxPerf.scaledcount(thread["instructions"])
+        !isnan(instructions) && instructions > 10^3
+    end
+end
+
+end
diff --git a/test/SerializationTests.jl b/test/SerializationTests.jl
index c6a0e9fb..7be3e791 100644
--- a/test/SerializationTests.jl
+++ b/test/SerializationTests.jl
@@ -30,23 +30,21 @@ function withtempdir(f::Function)
 end
 
 @testset "Successful (de)serialization" begin
-    for enable_linux_perf in (false, true)
-        b = @benchmarkable sin(1) enable_linux_perf = enable_linux_perf
-        tune!(b)
-        bb = run(b)
-
-        withtempdir() do
-            tmp = joinpath(pwd(), "tmp.json")
-
-            BenchmarkTools.save(tmp, b.params, bb)
-            @test isfile(tmp)
-
-            results = BenchmarkTools.load(tmp)
-            @test results isa Vector{Any}
-            @test length(results) == 2
-            @test eq(results[1], b.params)
-            @test eq(results[2], bb)
-        end
+    b = @benchmarkable sin(1)
+    tune!(b)
+    bb = run(b)
+
+    withtempdir() do
+        tmp = joinpath(pwd(), "tmp.json")
+
+        BenchmarkTools.save(tmp, b.params, bb)
+        @test isfile(tmp)
+
+        results = BenchmarkTools.load(tmp)
+        @test results isa Vector{Any}
+        @test length(results) == 2
+        @test eq(results[1], b.params)
+        @test eq(results[2], bb)
     end
 
     # Nested BenchmarkGroups
diff --git a/test/runtests.jl b/test/runtests.jl
index 6f58393a..38ce59f7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,3 +34,9 @@ println("done (took ", took_seconds, " seconds)")
 print("Testing serialization...")
 took_seconds = @elapsed include("SerializationTests.jl")
 println("done (took ", took_seconds, " seconds)")
+
+if parse(Bool, get(ENV, "TEST_PERF_INTEGRATION", "true"))
+    print("Testing Perf integration...")
+    took_seconds = @elapsed include("LinuxPerfIntegrationTests.jl")
+    println("done (took ", took_seconds, " seconds)")
+end

From 87081e6b66b73683344daf194193514442b9b072 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 2 May 2024 22:29:35 +0100
Subject: [PATCH 13/29] Use `@noinline` from Compat in linux_perf_func

---
 src/execution.jl | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/execution.jl b/src/execution.jl
index 5bb524f3..44812f51 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -608,21 +608,25 @@ function generate_benchmark_definition(
                 )
 
                 try
-                    @noinline $(setup)
+                    $BenchmarkTools.@noinline $(setup)
                     __evals = __params.evals
                     # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
                     # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
-                    @noinline (function (__evals)
-                        $LinuxPerf.enable_all!()
-                        # We'll run it evals times.
-                        @noinline __return_val_2 = $(invocation)
-                        for __iter in 2:__evals
-                            @noinline $(invocation)
+                    $BenchmarkTools.@noinline (
+                        function (__evals)
+                            $LinuxPerf.enable_all!()
+                            # We'll run it evals times.
+                            $BenchmarkTools.@noinline __return_val_2 = $(invocation)
+                            for __iter in 2:__evals
+                                $BenchmarkTools.@noinline $(invocation)
+                            end
+                            $LinuxPerf.disable_all!()
+                            # trick the compiler not to eliminate the code
+                            return __return_val_2
                         end
-                        $LinuxPerf.disable_all!()
-                        # trick the compiler not to eliminate the code
-                        return __return_val_2
-                    end)(__evals)
+                    )(
+                        __evals
+                    )
                     return $LinuxPerf.Stats(__linux_perf_bench)
                 finally
                     close(__linux_perf_bench)

From fd2f88a6050d11ca92ad106d7029cecbc6b9cccc Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 09:53:20 +0100
Subject: [PATCH 14/29] Introduce hooks to allow more customisable benchmarking

---
 .github/workflows/CI.yml          |   2 -
 Project.toml                      |   2 -
 docs/src/manual.md                |   5 -
 src/BenchmarkTools.jl             |   2 -
 src/execution.jl                  | 152 +++++++++++++-------
 src/parameters.jl                 | 224 +++++++++++++++++++++---------
 src/serialization.jl              |  71 ++++------
 src/trials.jl                     |  67 +++++++--
 test/LinuxPerfIntegrationTests.jl |  55 --------
 test/ParametersTests.jl           |  20 ---
 test/SerializationTests.jl        |  50 ++++---
 test/runtests.jl                  |   6 -
 12 files changed, 368 insertions(+), 288 deletions(-)
 delete mode 100644 test/LinuxPerfIntegrationTests.jl

diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 5f39504d..b22812a9 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -51,8 +51,6 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
-        env:
-          TEST_PERF_INTEGRATION: false
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v3
         with:
diff --git a/Project.toml b/Project.toml
index d34e7ba1..24139691 100644
--- a/Project.toml
+++ b/Project.toml
@@ -5,7 +5,6 @@ version = "1.6.0"
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-LinuxPerf = "b4c46c6c-4fb0-484d-a11a-41bc3392d094"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
@@ -17,7 +16,6 @@ Aqua = "0.8"
 Compat = ">= 4.11.0"
 JSON = "0.18, 0.19, 0.20, 0.21"
 JuliaFormatter = "1"
-LinuxPerf = "0.3.8"
 Logging = "<0.0.1, 1"
 Printf = "<0.0.1, 1"
 Profile = "<0.0.1, 1"
diff --git a/docs/src/manual.md b/docs/src/manual.md
index 1a651ec6..df2755cf 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -85,11 +85,6 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
 - `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gcsample = false`.
 - `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
 - `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
-- `enable_linux_perf`: If `true`, profile using perf `evals` times. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = false`.
-- `linux_perf_groups`: The event groups you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = "(instructions,branch-instructions)"`.
-- `linux_perf_spaces`: Which of user, kernel and hypervisor space you want to profile, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = (true, false, false)`.
-- `linux_perf_threads`: If `true`, all threads are profiled else only the the thread which starts the benchmark is profiled, see [LinuxPerf.jl](https://github.com/JuliaPerf/LinuxPerf.jl) for more details. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = true`.
-- `linux_perf_gcscrub`: If `true`, run `gc()` before executing the profiling run. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = true`.
 
 To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:
 
diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl
index 362d7b35..766fe6c9 100644
--- a/src/BenchmarkTools.jl
+++ b/src/BenchmarkTools.jl
@@ -10,8 +10,6 @@ using Printf
 using Profile
 using Compat: pkgversion, @noinline
 
-using LinuxPerf: LinuxPerf
-
 ##############
 # Parameters #
 ##############
diff --git a/src/execution.jl b/src/execution.jl
index 44812f51..973880e0 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -16,7 +16,7 @@ end
 
 mutable struct Benchmark
     samplefunc
-    linux_perf_func
+    customisable_func
     quote_vals
     params::Parameters
 end
@@ -110,25 +110,56 @@ end
 function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, kwargs...)
     params = Parameters(p; kwargs...)
     @assert params.seconds > 0.0 "time limit must be greater than 0.0"
-    if warmup
-        b.samplefunc(b.quote_vals, Parameters(params; evals=1)) #warmup sample
+    @assert params.enable_customisable_func in (:FALSE, :ALL, :LAST) "invalid value $(params.enable_customisable_func) for enable_customisable_func which must be :FALSE, :ALL or :LAST"
+    @assert !(
+        params.run_customisable_func_only && params.enable_customisable_func == :FALSE
+    ) "run_customisable_func_only is set to true, but enable_customisable_func is set to :FALSE"
+    if warmup #warmup sample
+        params.run_customisable_func_only &&
+            b.samplefunc(b.quote_vals, Parameters(params; evals=1))
+        !params.run_customisable_func_only &&
+            b.customisable_func(b.quote_vals, Parameters(params; evals=1))
     end
     trial = Trial(params)
+    if params.enable_customisable_func == :ALL
+        trial.customisable_result = []
+        trial.customisable_result_for_every_sample = true
+    end
     params.gctrial && gcscrub()
     start_time = Base.time()
-    s = b.samplefunc(b.quote_vals, params)
-    push!(trial, s[1:(end - 1)]...)
-    return_val = s[end]
+
+    return_val = nothing
+    if !params.run_customisable_func_only
+        s = b.samplefunc(b.quote_vals, params)
+        push!(trial, s[1:(end - 1)]...)
+        return_val = s[end]
+    end
+    if params.enable_customisable_func == :ALL
+        params.customisable_gcsample && gcscrub()
+        push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
+    end
+
     iters = 2
     while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
         params.gcsample && gcscrub()
         push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
+
+        if params.enable_customisable_func == :ALL
+            params.customisable_gcsample && gcscrub()
+            push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
+        end
+
         iters += 1
     end
 
-    if params.enable_linux_perf
-        params.linux_perf_gcscrub && gcscrub()
-        trial.linux_perf_stats = b.linux_perf_func(b.quote_vals, params)
+    if params.enable_customisable_func !== :FALSE
+        params.customisable_gcsample && gcscrub()
+        s = b.customisable_func(b.quote_vals, params)
+        trial.customisable_result = s[1]
+
+        if params.run_customisable_func_only
+            return_val = s[end]
+        end
     end
 
     return trial, return_val
@@ -513,6 +544,24 @@ macro benchmarkable(args...)
     end
 end
 
+samplefunc_prehook() = (Base.gc_num(), time_ns())
+samplefunc_posthook = samplefunc_prehook
+function samplefunc_sample_result(__params, _, prehook_result, posthook_result)
+    __evals = __params.evals
+    __sample_time = posthook_result[2] - prehook_result[2]
+    __gcdiff = Base.GC_Diff(posthook_result[1], prehook_result[1])
+
+    __time = max((__sample_time / __evals) - __params.overhead, 0.001)
+    __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
+    __memory = Int(Base.fld(__gcdiff.allocd, __evals))
+    __allocs = Int(
+        Base.fld(
+            __gcdiff.malloc + __gcdiff.realloc + __gcdiff.poolalloc + __gcdiff.bigalloc,
+            __evals,
+        ),
+    )
+    return __time, __gctime, __memory, __allocs
+end
 # `eval` an expression that forcibly defines the specified benchmark at
 # top-level in order to allow transfer of locally-scoped variables into
 # benchmark scope.
@@ -526,7 +575,7 @@ function generate_benchmark_definition(
     @nospecialize
     corefunc = gensym("core")
     samplefunc = gensym("sample")
-    linux_perf_func = gensym("perf")
+    customisable_func = gensym("customisable")
     type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
     signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
     signature_def = Expr(
@@ -570,71 +619,70 @@ function generate_benchmark_definition(
             @noinline function $(samplefunc)(
                 $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
             )
-                $(setup)
-                __evals = __params.evals
-                __gc_start = Base.gc_num()
-                __start_time = time_ns()
-                __return_val = $(invocation)
-                for __iter in 2:__evals
-                    $(invocation)
-                end
-                __sample_time = time_ns() - __start_time
-                __gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start)
-                $(teardown)
-                __time = max((__sample_time / __evals) - __params.overhead, 0.001)
-                __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0)
-                __memory = Int(Base.fld(__gcdiff.allocd, __evals))
-                __allocs = Int(
-                    Base.fld(
-                        __gcdiff.malloc +
-                        __gcdiff.realloc +
-                        __gcdiff.poolalloc +
-                        __gcdiff.bigalloc,
-                        __evals,
-                    ),
+                $BenchmarkTools.@noinline $(setup)
+                # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
+                # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
+                __prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
+                    function (__evals)
+                        prehook_result = $BenchmarkTools.samplefunc_prehook()
+                        # We'll run it evals times.
+                        $BenchmarkTools.@noinline __return_val_2 = $(invocation)
+                        for __iter in 2:__evals
+                            $BenchmarkTools.@noinline $(invocation)
+                        end
+                        posthook_result = $BenchmarkTools.samplefunc_posthook()
+                        # trick the compiler not to eliminate the code
+                        return prehook_result, posthook_result, __return_val_2
+                    end
+                )(
+                    __params.evals
                 )
-                return __time, __gctime, __memory, __allocs, __return_val
+                $(teardown)
+                return $BenchmarkTools.samplefunc_sample_result(
+                    __params, nothing, __prehook_result, __posthook_result
+                )...,
+                __return_val
             end
-            @noinline function $(linux_perf_func)(
+            @noinline function $(customisable_func)(
                 $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
             )
-                # Based on https://github.com/JuliaPerf/LinuxPerf.jl/blob/a7fee0ff261a5b5ce7a903af7b38d1b5c27dd931/src/LinuxPerf.jl#L1043-L1061
-                __linux_perf_groups = $LinuxPerf.set_default_spaces(
-                    $LinuxPerf.parse_groups(__params.linux_perf_groups),
-                    __params.linux_perf_spaces,
-                )
-                __linux_perf_bench = $LinuxPerf.make_bench_threaded(
-                    __linux_perf_groups; threads=__params.linux_perf_threads
-                )
-
+                local __setup_prehook_result
                 try
+                    __setup_prehook_result = $BenchmarkTools.@noinline __params.setup_prehook(
+                        __params
+                    )
                     $BenchmarkTools.@noinline $(setup)
-                    __evals = __params.evals
                     # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
                     # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
-                    $BenchmarkTools.@noinline (
+                    __prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
                         function (__evals)
-                            $LinuxPerf.enable_all!()
+                            prehook_result = __params.prehook()
                             # We'll run it evals times.
                             $BenchmarkTools.@noinline __return_val_2 = $(invocation)
                             for __iter in 2:__evals
                                 $BenchmarkTools.@noinline $(invocation)
                             end
-                            $LinuxPerf.disable_all!()
+                            posthook_result = __params.posthook()
                             # trick the compiler not to eliminate the code
-                            return __return_val_2
+                            return prehook_result, posthook_result, __return_val_2
                         end
                     )(
-                        __evals
+                        __params.evals
                     )
-                    return $LinuxPerf.Stats(__linux_perf_bench)
+                    return __params.sample_result(
+                        __params,
+                        __setup_prehook_result,
+                        __prehook_result,
+                        __posthook_result,
+                    ),
+                    __return_val
                 finally
-                    close(__linux_perf_bench)
                     $(teardown)
+                    __params.teardown_posthook(__params, __setup_prehook_result)
                 end
             end
             $BenchmarkTools.Benchmark(
-                $(samplefunc), $(linux_perf_func), $(quote_vals), $(params)
+                $(samplefunc), $(customisable_func), $(quote_vals), $(params)
             )
         end,
     )
diff --git a/src/parameters.jl b/src/parameters.jl
index 85b5fcb1..cbd530bd 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -5,7 +5,7 @@ const RESOLUTION = 1000 # 1 μs = 1000 ns
 # Parameters #
 ##############
 
-mutable struct Parameters
+mutable struct Parameters{A<:Function,B<:Function}
     seconds::Float64
     samples::Int
     evals::Int
@@ -15,17 +15,59 @@ mutable struct Parameters
     gcsample::Bool
     time_tolerance::Float64
     memory_tolerance::Float64
-    enable_linux_perf::Bool
-    linux_perf_groups::String
-    linux_perf_spaces::NTuple{3,Bool}
-    linux_perf_threads::Bool
-    linux_perf_gcscrub::Bool
+    run_customisable_func_only::Bool
+    enable_customisable_func::Symbol
+    customisable_gcsample::Bool
+    setup_prehook::Function
+    teardown_posthook::Function
+    sample_result::Function
+    prehook::A
+    posthook::B
 end
 
-# Task clock has large overhead so is not useful for the short time we run functions under perf
-# Further we benchmark anyways so no need for cycles or task clock
-# I've tried to only use one group by getting rid of noisy or not useful metrics
-const DEFAULT_PARAMETERS = Parameters(
+# https://github.com/JuliaLang/julia/issues/17186
+function Parameters(
+    seconds,
+    samples,
+    evals,
+    evals_set,
+    overhead,
+    gctrial,
+    gcsample,
+    time_tolerance,
+    memory_tolerance,
+    run_customisable_func_only,
+    enable_customisable_func,
+    customisable_gcsample,
+    setup_prehook,
+    teardown_posthook,
+    sample_result,
+    prehook::A,
+    posthook::B,
+) where {A,B}
+    return Parameters{A,B}(
+        seconds,
+        samples,
+        evals,
+        evals_set,
+        overhead,
+        gctrial,
+        gcsample,
+        time_tolerance,
+        memory_tolerance,
+        run_customisable_func_only,
+        enable_customisable_func,
+        customisable_gcsample,
+        setup_prehook,
+        teardown_posthook,
+        sample_result,
+        prehook,
+        posthook,
+    )
+end
+
+_nothing_func(args...) = nothing
+DEFAULT_PARAMETERS = Parameters(
     5.0,
     10000,
     1,
@@ -35,11 +77,16 @@ const DEFAULT_PARAMETERS = Parameters(
     false,
     0.05,
     0.01,
+    # Customisable Parameters
     false,
-    "(instructions,branch-instructions)",
-    (true, false, false),
-    true,
-    true,
+    :FALSE,
+    false,
+    # Customisable functions
+    _nothing_func,
+    _nothing_func,
+    _nothing_func,
+    _nothing_func,
+    _nothing_func,
 )
 
 function Parameters(;
@@ -52,11 +99,14 @@ function Parameters(;
     gcsample=DEFAULT_PARAMETERS.gcsample,
     time_tolerance=DEFAULT_PARAMETERS.time_tolerance,
     memory_tolerance=DEFAULT_PARAMETERS.memory_tolerance,
-    enable_linux_perf=DEFAULT_PARAMETERS.enable_linux_perf,
-    linux_perf_groups=DEFAULT_PARAMETERS.linux_perf_groups,
-    linux_perf_spaces=DEFAULT_PARAMETERS.linux_perf_spaces,
-    linux_perf_threads=DEFAULT_PARAMETERS.linux_perf_threads,
-    linux_perf_gcscrub=DEFAULT_PARAMETERS.linux_perf_gcscrub,
+    run_customisable_func_only=DEFAULT_PARAMETERS.run_customisable_func_only,
+    enable_customisable_func=DEFAULT_PARAMETERS.enable_customisable_func,
+    customisable_gcsample=DEFAULT_PARAMETERS.customisable_gcsample,
+    setup_prehook=DEFAULT_PARAMETERS.setup_prehook,
+    teardown_posthook=DEFAULT_PARAMETERS.teardown_posthook,
+    sample_result=DEFAULT_PARAMETERS.sample_result,
+    prehook=DEFAULT_PARAMETERS.prehook,
+    posthook=DEFAULT_PARAMETERS.posthook,
 )
     return Parameters(
         seconds,
@@ -68,11 +118,14 @@ function Parameters(;
         gcsample,
         time_tolerance,
         memory_tolerance,
-        enable_linux_perf,
-        linux_perf_groups,
-        linux_perf_spaces,
-        linux_perf_threads,
-        linux_perf_gcscrub,
+        run_customisable_func_only,
+        enable_customisable_func,
+        customisable_gcsample,
+        setup_prehook,
+        teardown_posthook,
+        sample_result,
+        prehook,
+        posthook,
     )
 end
 
@@ -81,54 +134,91 @@ function Parameters(
     seconds=nothing,
     samples=nothing,
     evals=nothing,
+    evals_set=nothing,
     overhead=nothing,
     gctrial=nothing,
     gcsample=nothing,
     time_tolerance=nothing,
     memory_tolerance=nothing,
-    enable_linux_perf=nothing,
-    linux_perf_groups=nothing,
-    linux_perf_spaces=nothing,
-    linux_perf_threads=nothing,
-    linux_perf_gcscrub=nothing,
+    run_customisable_func_only=nothing,
+    enable_customisable_func=nothing,
+    customisable_gcsample=nothing,
+    setup_prehook=nothing,
+    teardown_posthook=nothing,
+    sample_result=nothing,
+    prehook=nothing,
+    posthook=nothing,
 )
-    params = Parameters()
-    params.seconds = seconds != nothing ? seconds : default.seconds
-    params.samples = samples != nothing ? samples : default.samples
-    params.evals = evals != nothing ? evals : default.evals
-    params.overhead = overhead != nothing ? overhead : default.overhead
-    params.gctrial = gctrial != nothing ? gctrial : default.gctrial
-    params.gcsample = gcsample != nothing ? gcsample : default.gcsample
-    params.time_tolerance =
+    params_seconds = seconds != nothing ? seconds : default.seconds
+    params_samples = samples != nothing ? samples : default.samples
+    params_evals = evals != nothing ? evals : default.evals
+    params_evals_set = evals_set != nothing ? evals_set : default.evals_set
+    params_overhead = overhead != nothing ? overhead : default.overhead
+    params_gctrial = gctrial != nothing ? gctrial : default.gctrial
+    params_gcsample = gcsample != nothing ? gcsample : default.gcsample
+    params_time_tolerance =
         time_tolerance != nothing ? time_tolerance : default.time_tolerance
-    params.memory_tolerance =
+    params_memory_tolerance =
         memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
-    params.enable_linux_perf = if enable_linux_perf != nothing
-        enable_linux_perf
+    params_run_customisable_func_only = if run_customisable_func_only != nothing
+        run_customisable_func_only
+    else
+        default.run_customisable_func_only
+    end
+    params_enable_customisable_func = if enable_customisable_func != nothing
+        enable_customisable_func
     else
-        default.enable_linux_perf
+        default.enable_customisable_func
     end
-    params.linux_perf_groups = if linux_perf_groups != nothing
-        linux_perf_groups
+    params_customisable_gcscrub = if customisable_gcsample != nothing
+        customisable_gcsample
     else
-        default.linux_perf_groups
+        default.customisable_gcsample
     end
-    params.linux_perf_spaces = if linux_perf_spaces != nothing
-        linux_perf_spaces
+    params_setup_prehook = if setup_prehook != nothing
+        setup_prehook
     else
-        default.linux_perf_spaces
+        default.setup_prehook
     end
-    params.linux_perf_threads = if linux_perf_threads != nothing
-        linux_perf_threads
+    params_teardown_posthook = if teardown_posthook != nothing
+        teardown_posthook
     else
-        default.linux_perf_threads
+        default.teardown_posthook
     end
-    params.linux_perf_gcscrub = if linux_perf_gcscrub != nothing
-        linux_perf_gcscrub
+    params_sample_result = if sample_result != nothing
+        sample_result
     else
-        default.linux_perf_gcscrub
+        default.sample_result
     end
-    return params::BenchmarkTools.Parameters
+    params_prehook = if prehook != nothing
+        prehook
+    else
+        default.prehook
+    end
+    params_posthook = if posthook != nothing
+        posthook
+    else
+        default.posthook
+    end
+    return Parameters(
+        params_seconds,
+        params_samples,
+        params_evals,
+        params_evals_set,
+        params_overhead,
+        params_gctrial,
+        params_gcsample,
+        params_time_tolerance,
+        params_memory_tolerance,
+        params_run_customisable_func_only,
+        params_enable_customisable_func,
+        params_customisable_gcscrub,
+        params_setup_prehook,
+        params_teardown_posthook,
+        params_sample_result,
+        params_prehook,
+        params_posthook,
+    )::BenchmarkTools.Parameters
 end
 
 function Base.:(==)(a::Parameters, b::Parameters)
@@ -140,11 +230,14 @@ function Base.:(==)(a::Parameters, b::Parameters)
            a.gcsample == b.gcsample &&
            a.time_tolerance == b.time_tolerance &&
            a.memory_tolerance == b.memory_tolerance &&
-           a.enable_linux_perf == b.enable_linux_perf &&
-           a.linux_perf_groups == b.linux_perf_groups &&
-           a.linux_perf_spaces == b.linux_perf_spaces &&
-           a.linux_perf_threads == b.linux_perf_threads &&
-           a.linux_perf_gcscrub == b.linux_perf_gcscrub
+           a.run_customisable_func_only == b.run_customisable_func_only &&
+           a.enable_customisable_func == b.enable_customisable_func &&
+           a.customisable_gcsample == b.customisable_gcsample &&
+           a.setup_prehook == b.setup_prehook &&
+           a.teardown_posthook == b.teardown_posthook &&
+           a.sample_result == b.sample_result &&
+           a.prehook == b.prehook &&
+           a.posthook == b.posthook
 end
 
 function Base.copy(p::Parameters)
@@ -158,11 +251,14 @@ function Base.copy(p::Parameters)
         p.gcsample,
         p.time_tolerance,
         p.memory_tolerance,
-        p.enable_linux_perf,
-        p.linux_perf_groups,
-        p.linux_perf_spaces,
-        p.linux_perf_threads,
-        p.linux_perf_gcscrub,
+        p.run_customisable_func_only,
+        p.enable_customisable_func,
+        p.customisable_gcsample,
+        p.setup_prehook,
+        p.teardown_posthook,
+        p.sample_result,
+        p.prehook,
+        p.posthook,
     )
 end
 
diff --git a/src/serialization.jl b/src/serialization.jl
index 8df49a74..16aaea57 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -1,7 +1,5 @@
 const VERSIONS = Dict(
-    "Julia" => string(VERSION),
-    "BenchmarkTools" => pkgversion(BenchmarkTools),
-    "LinuxPerf" => pkgversion(LinuxPerf),
+    "Julia" => string(VERSION), "BenchmarkTools" => pkgversion(BenchmarkTools)
 )
 
 # TODO: Add any new types as they're added
@@ -18,6 +16,9 @@ const SUPPORTED_TYPES = Dict{Symbol,Type}(
 )
 # n.b. Benchmark type not included here, since it is gensym'd
 
+const CUSTOM_CONVERT_TYPES = Type[]
+function _convert end
+
 function JSON.lower(x::Union{values(SUPPORTED_TYPES)...})
     d = Dict{String,Any}()
     T = typeof(x)
@@ -25,37 +26,13 @@ function JSON.lower(x::Union{values(SUPPORTED_TYPES)...})
         name = String(fieldname(T, i))
         field = getfield(x, i)
         ft = typeof(field)
+        field = ft <: Function ? nothing : field
         value = ft <: get(SUPPORTED_TYPES, nameof(ft), Union{}) ? JSON.lower(field) : field
         d[name] = value
     end
     return [string(nameof(typeof(x))), d]
 end
 
-# Recovers LinuxPerf.Stats from serialized form
-function _convert(::Type{Union{Nothing,LinuxPerf.Stats}}, d)
-    if isnothing(d)
-        return nothing
-    end
-    return LinuxPerf.Stats(_convert.(LinuxPerf.ThreadStats, d["threads"]))
-end
-function _convert(::Type{LinuxPerf.ThreadStats}, d::Dict{String})
-    return LinuxPerf.ThreadStats(
-        d["pid"],
-        [
-            [_convert(LinuxPerf.Counter, counter) for counter in group] for
-            group in d["groups"]
-        ],
-    )
-end
-function _convert(::Type{LinuxPerf.Counter}, d::Dict{String})
-    return LinuxPerf.Counter(
-        _convert(LinuxPerf.EventType, d["event"]), d["value"], d["enabled"], d["running"]
-    )
-end
-function _convert(::Type{LinuxPerf.EventType}, d::Dict{String})
-    return LinuxPerf.EventType(d["category"], d["event"])
-end
-
 # a minimal 'eval' function, mirroring KeyTypes, but being slightly more lenient
 safeeval(@nospecialize x) = x
 safeeval(x::QuoteNode) = x.value
@@ -77,8 +54,10 @@ function recover(x::Vector)
     for i in 1:fc
         ft = fieldtype(T, i)
         fn = String(fieldname(T, i))
-        if ft == Union{Nothing,LinuxPerf.Stats}
+        if ft in CUSTOM_CONVERT_TYPES
             xsi = _convert(ft, fields[fn])
+        elseif ft <: Function
+            xsi = BenchmarkTools._nothing_func
         elseif ft <: get(SUPPORTED_TYPES, nameof(ft), Union{})
             xsi = recover(fields[fn])
         else
@@ -89,22 +68,30 @@ function recover(x::Vector)
                 # JSON spec doesn't support Inf
                 # These fields should all be >= 0, so we can ignore -Inf case
                 typemax(ft)
-            elseif fn == "enable_linux_perf" && !haskey(fields, fn)
-                false
+            elseif fn == "enable_customisable_func"
+                if !haskey(fields, fn) || fields[fn] == "FALSE"
+                    :FALSE
+                elseif fields[fn] == "LAST"
+                    :LAST
+                elseif fields[fn] == "ALL"
+                    :ALL
+                else
+                    throw(
+                        ArgumentError(
+                            "Invalid value $(fields[fn]) for enable_customisable_func which must be one of :ALL, :LAST, :FALSE",
+                        ),
+                    )
+                end
             elseif fn in (
-                "linux_perf_groups",
-                "linux_perf_spaces",
-                "linux_perf_threads",
-                "linux_perf_gcscrub",
+                "run_customisable_func_only",
+                "customisable_gcsample",
+                "setup_prehook",
+                "teardown_posthook",
+                "sample_result",
+                "prehook",
+                "posthook",
             ) && !haskey(fields, fn)
                 getfield(BenchmarkTools.DEFAULT_PARAMETERS, Symbol(fn))
-            elseif fn == "linux_perf_spaces" && haskey(fields, fn)
-                length(fields[fn]) == 3 || throw(
-                    ArgumentError(
-                        "Expecting a vector of length 3 for linux_perf_spaces parameter",
-                    ),
-                )
-                xsi = convert(ft, (fields[fn][1], fields[fn][2], fields[fn][3]))
             else
                 convert(ft, fields[fn])
             end
diff --git a/src/trials.jl b/src/trials.jl
index 4bc88d54..6745e163 100644
--- a/src/trials.jl
+++ b/src/trials.jl
@@ -8,10 +8,27 @@ mutable struct Trial
     gctimes::Vector{Float64}
     memory::Int
     allocs::Int
-    linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
-
-    function Trial(params, times, gctimes, memory, allocs, linux_perf_stats=nothing)
-        return new(params, times, gctimes, memory, allocs, linux_perf_stats)
+    customisable_result
+    customisable_result_for_every_sample::Bool
+
+    function Trial(
+        params,
+        times,
+        gctimes,
+        memory,
+        allocs,
+        customisable_result=nothing,
+        customisable_result_for_every_sample=false,
+    )
+        return new(
+            params,
+            times,
+            gctimes,
+            memory,
+            allocs,
+            customisable_result,
+            customisable_result_for_every_sample,
+        )
     end
 end
 
@@ -32,7 +49,11 @@ function Base.copy(t::Trial)
         copy(t.gctimes),
         t.memory,
         t.allocs,
-        t.linux_perf_stats,
+        if t.customisable_result_for_every_sample
+            copy(t.customisable_result)
+        else
+            t.customisable_result
+        end,
     )
 end
 
@@ -53,11 +74,31 @@ end
 Base.length(t::Trial) = length(t.times)
 function Base.getindex(t::Trial, i::Number)
     return Trial(
-        t.params, [t.times[i]], [t.gctimes[i]], t.memory, t.allocs, t.linux_perf_stats
+        t.params,
+        [t.times[i]],
+        [t.gctimes[i]],
+        t.memory,
+        t.allocs,
+        if t.customisable_result_for_every_sample
+            [t.customisable_result[i]]
+        else
+            t.customisable_result
+        end,
     )
 end
 function Base.getindex(t::Trial, i)
-    return Trial(t.params, t.times[i], t.gctimes[i], t.memory, t.allocs, t.linux_perf_stats)
+    return Trial(
+        t.params,
+        t.times[i],
+        t.gctimes[i],
+        t.memory,
+        t.allocs,
+        if t.customisable_result_for_every_sample
+            t.customisable_result[i]
+        else
+            t.customisable_result
+        end,
+    )
 end
 Base.lastindex(t::Trial) = length(t)
 
@@ -114,16 +155,18 @@ mutable struct TrialEstimate
     gctime::Float64
     memory::Int
     allocs::Int
-    linux_perf_stats::Union{LinuxPerf.Stats,Nothing}
+    customisable_result
 
-    function TrialEstimate(params, times, gctime, memory, allocs, linux_perf_stats=nothing)
-        return new(params, times, gctime, memory, allocs, linux_perf_stats)
+    function TrialEstimate(
+        params, times, gctime, memory, allocs, customisable_result=nothing
+    )
+        return new(params, times, gctime, memory, allocs, customisable_result)
     end
 end
 
 function TrialEstimate(trial::Trial, t, gct)
     return TrialEstimate(
-        params(trial), t, gct, memory(trial), allocs(trial), trial.linux_perf_stats
+        params(trial), t, gct, memory(trial), allocs(trial), trial.customisable_result
     )
 end
 
@@ -137,7 +180,7 @@ end
 
 function Base.copy(t::TrialEstimate)
     return TrialEstimate(
-        copy(t.params), t.time, t.gctime, t.memory, t.allocs, t.linux_perf_stats
+        copy(t.params), t.time, t.gctime, t.memory, t.allocs, t.customisable_result
     )
 end
 
diff --git a/test/LinuxPerfIntegrationTests.jl b/test/LinuxPerfIntegrationTests.jl
deleted file mode 100644
index c1a5e2b1..00000000
--- a/test/LinuxPerfIntegrationTests.jl
+++ /dev/null
@@ -1,55 +0,0 @@
-module LinuxPerfIntegrationTests
-
-using BenchmarkTools
-using Test
-using LinuxPerf
-
-### Serialization Test ###
-b = @benchmarkable sin(1) enable_linux_perf = true
-tune!(b)
-bb = run(b)
-
-withtempdir() do
-    tmp = joinpath(pwd(), "tmp.json")
-
-    BenchmarkTools.save(tmp, b.params, bb)
-    @test isfile(tmp)
-
-    results = BenchmarkTools.load(tmp)
-    @test results isa Vector{Any}
-    @test length(results) == 2
-    @test eq(results[1], b.params)
-    @test eq(results[2], bb)
-end
-
-##################################
-# Linux Perf Integration #
-##################################
-
-b = @benchmarkable sin($(Ref(42.0))[])
-results = run(b; seconds=1, enable_linux_perf=false)
-@test results.linux_perf_stats === nothing
-
-b = @benchmarkable sin($(Ref(42.0))[])
-results = run(b; seconds=1)
-@test results.linux_perf_stats === nothing
-
-b = @benchmarkable sin($(Ref(42.0))[])
-results = run(b; seconds=1, enable_linux_perf=true, evals=10^3)
-@test results.linux_perf_stats !== nothing
-@test any(results.linux_perf_stats.threads) do thread
-    instructions = LinuxPerf.scaledcount(thread["instructions"])
-    !isnan(instructions) && instructions > 10^4
-end
-
-tune!(groups)
-results = run(groups; enable_linux_perf=true)
-for (name, group_results) in BenchmarkTools.leaves(results)
-    @test group_results.linux_perf_stats !== nothing
-    @test any(group_results.linux_perf_stats.threads) do thread
-        instructions = LinuxPerf.scaledcount(thread["instructions"])
-        !isnan(instructions) && instructions > 10^3
-    end
-end
-
-end
diff --git a/test/ParametersTests.jl b/test/ParametersTests.jl
index b028dd7d..9fa07027 100644
--- a/test/ParametersTests.jl
+++ b/test/ParametersTests.jl
@@ -26,11 +26,6 @@ p = Parameters(;
     gcsample=false,
     time_tolerance=0.043,
     memory_tolerance=0.15,
-    enable_linux_perf=false,
-    linux_perf_groups="(branch-instructions)",
-    linux_perf_spaces=(true, true, false),
-    linux_perf_threads=false,
-    linux_perf_gcscrub=false,
 )
 oldseconds = BenchmarkTools.DEFAULT_PARAMETERS.seconds
 oldgctrial = BenchmarkTools.DEFAULT_PARAMETERS.gctrial
@@ -40,11 +35,6 @@ oldsamples = BenchmarkTools.DEFAULT_PARAMETERS.samples
 oldevals = BenchmarkTools.DEFAULT_PARAMETERS.evals
 oldoverhead = BenchmarkTools.DEFAULT_PARAMETERS.overhead
 oldgcsample = BenchmarkTools.DEFAULT_PARAMETERS.gcsample
-old_enable_linux_perf = BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf
-old_linux_perf_groups = BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups
-old_linux_perf_spaces = BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces
-old_linux_perf_threads = BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads
-old_enable_linux_gcsample = BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = p.seconds
 BenchmarkTools.DEFAULT_PARAMETERS.gctrial = p.gctrial
 BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = p.time_tolerance
@@ -53,11 +43,6 @@ BenchmarkTools.DEFAULT_PARAMETERS.samples = p.samples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = p.evals
 BenchmarkTools.DEFAULT_PARAMETERS.overhead = p.overhead
 BenchmarkTools.DEFAULT_PARAMETERS.gcsample = p.gcsample
-BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = p.enable_linux_perf
-BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = p.linux_perf_groups
-BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = p.linux_perf_spaces
-BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = p.linux_perf_threads
-BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = p.linux_perf_gcscrub
 @test p == Parameters()
 @test p == Parameters(p)
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = oldseconds
@@ -68,10 +53,5 @@ BenchmarkTools.DEFAULT_PARAMETERS.samples = oldsamples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = oldevals
 BenchmarkTools.DEFAULT_PARAMETERS.overhead = oldoverhead
 BenchmarkTools.DEFAULT_PARAMETERS.gcsample = oldgcsample
-BenchmarkTools.DEFAULT_PARAMETERS.enable_linux_perf = old_enable_linux_perf
-BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups = old_linux_perf_groups
-BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces = old_linux_perf_spaces
-BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads = old_linux_perf_threads
-BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub = old_enable_linux_gcsample
 
 end # module
diff --git a/test/SerializationTests.jl b/test/SerializationTests.jl
index 7be3e791..bafe6377 100644
--- a/test/SerializationTests.jl
+++ b/test/SerializationTests.jl
@@ -1,23 +1,12 @@
 module SerializationTests
 
-using BenchmarkTools, LinuxPerf
+using BenchmarkTools
 using Test
 
 function eq(x::T, y::T) where {T<:Union{values(BenchmarkTools.SUPPORTED_TYPES)...}}
     return all(i -> eq(getfield(x, i), getfield(y, i)), 1:fieldcount(T))
 end
-eq(x::String, y::String) = x == y
-eq(x::NTuple{3,Bool}, y::NTuple{3,Bool}) = x == y
-function eq(x::LinuxPerf.Stats, y::LinuxPerf.Stats)
-    return all(a -> eq(a[1], a[2]), zip(x.threads, y.threads))
-end
-function eq(x::LinuxPerf.ThreadStats, y::LinuxPerf.ThreadStats)
-    return x.pid == y.pid && x.groups == y.groups
-end
-eq(x::Nothing, y) = isnothing(y)
-eq(x, y::Nothing) = isnothing(x)
-eq(x::Nothing, y::Nothing) = true
-eq(x::T, y::T) where {T} = isapprox(x, y)
+eq(x::T, y::T) where {T} = x == y
 
 function withtempdir(f::Function)
     d = mktempdir()
@@ -110,7 +99,7 @@ end
     @test_throws ArgumentError BenchmarkTools.recover([1])
 end
 
-@testset "Backwards Compatibility with evals_set and linux perf options" begin
+@testset "Backwards Compatibility with evals_set" begin
     json_string = "[{\"Julia\":\"1.11.0-DEV.1116\",\"BenchmarkTools\":\"1.4.0\"},[[\"Parameters\",{\"gctrial\":true,\"time_tolerance\":0.05,\"samples\":10000,\"evals\":1,\"gcsample\":false,\"seconds\":5.0,\"overhead\":0.0,\"memory_tolerance\":0.01}]]]"
     json_io = IOBuffer(json_string)
 
@@ -126,10 +115,13 @@ end
             0.05,
             0.01,
             false,
-            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups,
-            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces,
-            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads,
-            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub,
+            :FALSE,
+            false,
+            BenchmarkTools._nothing_func,
+            BenchmarkTools._nothing_func,
+            BenchmarkTools._nothing_func,
+            BenchmarkTools._nothing_func,
+            BenchmarkTools._nothing_func,
         ),
     ]
 
@@ -148,10 +140,13 @@ end
             0.05,
             0.01,
             false,
-            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups,
-            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces,
-            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads,
-            BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub,
+            :FALSE,
+            false,
+            BenchmarkTools._nothing_func,
+            BenchmarkTools._nothing_func,
+            BenchmarkTools._nothing_func,
+            BenchmarkTools._nothing_func,
+            BenchmarkTools._nothing_func,
         ),
     ]
 end
@@ -168,10 +163,13 @@ end
         Inf,
         Inf,
         false,
-        BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_groups,
-        BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_spaces,
-        BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_threads,
-        BenchmarkTools.DEFAULT_PARAMETERS.linux_perf_gcscrub,
+        :FALSE,
+        false,
+        BenchmarkTools._nothing_func,
+        BenchmarkTools._nothing_func,
+        BenchmarkTools._nothing_func,
+        BenchmarkTools._nothing_func,
+        BenchmarkTools._nothing_func,
     )
 
     io = IOBuffer()
diff --git a/test/runtests.jl b/test/runtests.jl
index 38ce59f7..6f58393a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,9 +34,3 @@ println("done (took ", took_seconds, " seconds)")
 print("Testing serialization...")
 took_seconds = @elapsed include("SerializationTests.jl")
 println("done (took ", took_seconds, " seconds)")
-
-if parse(Bool, get(ENV, "TEST_PERF_INTEGRATION", "true"))
-    print("Testing Perf integration...")
-    took_seconds = @elapsed include("LinuxPerfIntegrationTests.jl")
-    println("done (took ", took_seconds, " seconds)")
-end

From 8c87eb91b3e126730e81f309ebc8ddbe112904bd Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 10:26:43 +0100
Subject: [PATCH 15/29] Fix serialization of custom benchmark result

---
 src/serialization.jl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/serialization.jl b/src/serialization.jl
index 16aaea57..95d3ef2e 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -16,8 +16,7 @@ const SUPPORTED_TYPES = Dict{Symbol,Type}(
 )
 # n.b. Benchmark type not included here, since it is gensym'd
 
-const CUSTOM_CONVERT_TYPES = Type[]
-function _convert end
+function customisable_result_recover end
 
 function JSON.lower(x::Union{values(SUPPORTED_TYPES)...})
     d = Dict{String,Any}()
@@ -54,8 +53,8 @@ function recover(x::Vector)
     for i in 1:fc
         ft = fieldtype(T, i)
         fn = String(fieldname(T, i))
-        if ft in CUSTOM_CONVERT_TYPES
-            xsi = _convert(ft, fields[fn])
+        if fn == "customisable_result"
+            xsi = customisable_result_recover(fields[fn])
         elseif ft <: Function
             xsi = BenchmarkTools._nothing_func
         elseif ft <: get(SUPPORTED_TYPES, nameof(ft), Union{})

From fd3c82bfa3265042444bf6b3cb63fcdb943b12a7 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 10:46:04 +0100
Subject: [PATCH 16/29] Fix serialization of custom benchmarking result

---
 src/serialization.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/serialization.jl b/src/serialization.jl
index 95d3ef2e..19e2c4ce 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -16,7 +16,7 @@ const SUPPORTED_TYPES = Dict{Symbol,Type}(
 )
 # n.b. Benchmark type not included here, since it is gensym'd
 
-function customisable_result_recover end
+customisable_result_recover(::Nothing) = nothing
 
 function JSON.lower(x::Union{values(SUPPORTED_TYPES)...})
     d = Dict{String,Any}()

From 031f5a44f393d8d8130e38a416c329b3f5f556d7 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 12:13:34 +0100
Subject: [PATCH 17/29] Revert buildkite change

---
 .buildkite/pipeline.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 45d42406..e90054d8 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -1,12 +1,12 @@
 ---
 steps:
-  - label: "Julia 1 - Test LinuxPerf Integration"
+  - label: ":test_tube: Test the package"
     plugins:
       - JuliaCI/julia#v1:
           version: "1"
-      - JuliaCI/julia-test#v1:
+    commands: |
+      julia --version
     agents:
       queue: "juliaecosystem"
       os: "linux"
       arch: "x86_64"
-    timeout_in_minutes: 15

From f11b769779dbf56e5399129e6ff43e4e6a02dbe2 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 12:35:57 +0100
Subject: [PATCH 18/29] Don't assume hooks are functions

This should theoretically allow FunctionWrappers or callable structs to be used, though serialization may be an issue.
---
 src/parameters.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/parameters.jl b/src/parameters.jl
index cbd530bd..8470ac6e 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -5,7 +5,7 @@ const RESOLUTION = 1000 # 1 μs = 1000 ns
 # Parameters #
 ##############
 
-mutable struct Parameters{A<:Function,B<:Function}
+mutable struct Parameters{A,B}
     seconds::Float64
     samples::Int
     evals::Int
@@ -18,9 +18,9 @@ mutable struct Parameters{A<:Function,B<:Function}
     run_customisable_func_only::Bool
     enable_customisable_func::Symbol
     customisable_gcsample::Bool
-    setup_prehook::Function
-    teardown_posthook::Function
-    sample_result::Function
+    setup_prehook
+    teardown_posthook
+    sample_result
     prehook::A
     posthook::B
 end

From 512ed0a7a70171fd418c84359abd724998e4479c Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 14:18:54 +0100
Subject: [PATCH 19/29] Fix deserialization of hooks

---
 src/serialization.jl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/serialization.jl b/src/serialization.jl
index 19e2c4ce..92c95873 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -55,7 +55,9 @@ function recover(x::Vector)
         fn = String(fieldname(T, i))
         if fn == "customisable_result"
             xsi = customisable_result_recover(fields[fn])
-        elseif ft <: Function
+        elseif fn in (
+            "setup_prehook", "teardown_posthook", "sample_result", "prehook", "posthook"
+        )
             xsi = BenchmarkTools._nothing_func
         elseif ft <: get(SUPPORTED_TYPES, nameof(ft), Union{})
             xsi = recover(fields[fn])

From ff4fc43bb205d06520f93cd308183b0219b9a9c3 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 14:26:06 +0100
Subject: [PATCH 20/29] Add some tests

---
 src/execution.jl                   | 15 +++--
 test/CustomisableBenchmarkTests.jl | 92 ++++++++++++++++++++++++++++++
 test/ParametersTests.jl            | 33 +++++++++++
 test/runtests.jl                   |  4 ++
 4 files changed, 140 insertions(+), 4 deletions(-)
 create mode 100644 test/CustomisableBenchmarkTests.jl

diff --git a/src/execution.jl b/src/execution.jl
index 973880e0..b8a4baee 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -136,13 +136,20 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k
     end
     if params.enable_customisable_func == :ALL
         params.customisable_gcsample && gcscrub()
-        push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
+        s = b.customisable_func(b.quote_vals, params)
+        push!(trial.customisable_result, s[1])
+
+        if params.run_customisable_func_only
+            return_val = s[end]
+        end
     end
 
     iters = 2
     while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
-        params.gcsample && gcscrub()
-        push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
+        if !params.run_customisable_func_only
+            params.gcsample && gcscrub()
+            push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
+        end
 
         if params.enable_customisable_func == :ALL
             params.customisable_gcsample && gcscrub()
@@ -152,7 +159,7 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k
         iters += 1
     end
 
-    if params.enable_customisable_func !== :FALSE
+    if params.enable_customisable_func == :LAST
         params.customisable_gcsample && gcscrub()
         s = b.customisable_func(b.quote_vals, params)
         trial.customisable_result = s[1]
diff --git a/test/CustomisableBenchmarkTests.jl b/test/CustomisableBenchmarkTests.jl
new file mode 100644
index 00000000..548e6023
--- /dev/null
+++ b/test/CustomisableBenchmarkTests.jl
@@ -0,0 +1,92 @@
+module CustomisableBenchmarkTests
+
+using BenchmarkTools
+using Test
+
+x = Ref(0)
+setup_prehook(_) = x[] += 1
+prehook() = x[] += 1
+posthook() = x[] += 1
+function sample_result(_, setup_prehook_result, preehook_result, posthook_result)
+    @test setup_prehook_result == 1
+    @test preehook_result == 2
+    @test posthook_result == 3
+    @test x[] == 3
+    return x[] += 1
+end
+function teardown_posthook(_, setup_prehook_result)
+    @test setup_prehook_result == 1
+    @test x[] == 4
+    return x[] += 1
+end
+
+@testset "Disabled custom benchmarking" begin
+    x[] = 0
+    res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook = posthook sample_result =
+        sample_result teardown_posthook = teardown_posthook run_customisable_func_only = false
+    @test res.customisable_result === nothing
+    @test !res.customisable_result_for_every_sample
+end
+
+@testset "custom benchmarking last" begin
+    for run_customisable_func_only in (true, false)
+        x[] = 0
+        res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook =
+            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customisable_func =
+            :LAST run_customisable_func_only = run_customisable_func_only
+        if run_customisable_func_only
+            @test isempty(res.times)
+            @test isempty(res.gctimes)
+            @test res.memory == typemax(Int)
+            @test res.allocs == typemax(Int)
+        end
+        @test !res.customisable_result_for_every_sample
+        @test res.customisable_result === 4
+    end
+end
+
+@testset "custom benchmark every sample, independent of iterations" begin
+    for run_customisable_func_only in (true, false)
+        x[] = 0
+        setup_prehook(_) = x[] = 1
+        res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook =
+            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customisable_func =
+            :ALL run_customisable_func_only = run_customisable_func_only samples = 1000
+        if run_customisable_func_only
+            @test isempty(res.times)
+            @test isempty(res.gctimes)
+            @test res.memory == typemax(Int)
+            @test res.allocs == typemax(Int)
+        end
+        @test res.customisable_result_for_every_sample
+        @test res.customisable_result == fill(4, 1000)
+    end
+end
+
+@testset "custom benchmark every sample with iteration dependence" begin
+    for run_customisable_func_only in (true, false)
+        x[] = 0
+        setup_prehook(_) = x[] += 1
+        prehook() = x[] += 1
+        posthook() = x[] += 1
+        function sample_result(_, setup_prehook_result, preehook_result, posthook_result)
+            return x[] += 1
+        end
+        function teardown_posthook(_, setup_prehook_result)
+            return x[] += 1
+        end
+        res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook =
+            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customisable_func =
+            :ALL run_customisable_func_only = run_customisable_func_only samples = 1000
+        if run_customisable_func_only
+            @test isempty(res.times)
+            @test isempty(res.gctimes)
+            @test res.memory == typemax(Int)
+            @test res.allocs == typemax(Int)
+        end
+        @test res.customisable_result_for_every_sample
+        @test res.customisable_result == collect(5 * (1:1000) .- 1)
+    end
+end
+
+end # module
diff --git a/test/ParametersTests.jl b/test/ParametersTests.jl
index 9fa07027..03fa1644 100644
--- a/test/ParametersTests.jl
+++ b/test/ParametersTests.jl
@@ -17,6 +17,7 @@ BenchmarkTools.DEFAULT_PARAMETERS.gctrial = p.gctrial
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = oldseconds
 BenchmarkTools.DEFAULT_PARAMETERS.gctrial = oldgctrial
 
+f(x) = x
 p = Parameters(;
     seconds=1,
     gctrial=false,
@@ -26,6 +27,14 @@ p = Parameters(;
     gcsample=false,
     time_tolerance=0.043,
     memory_tolerance=0.15,
+    # Customisable Parameters
+    run_customisable_func_only=true,
+    enable_customisable_func=:ALL,
+    customisable_gcsample=true,
+    # Customisable functions
+    setup_prehook=f,
+    teardown_posthook=f,
+    sample_result=f,
 )
 oldseconds = BenchmarkTools.DEFAULT_PARAMETERS.seconds
 oldgctrial = BenchmarkTools.DEFAULT_PARAMETERS.gctrial
@@ -35,6 +44,16 @@ oldsamples = BenchmarkTools.DEFAULT_PARAMETERS.samples
 oldevals = BenchmarkTools.DEFAULT_PARAMETERS.evals
 oldoverhead = BenchmarkTools.DEFAULT_PARAMETERS.overhead
 oldgcsample = BenchmarkTools.DEFAULT_PARAMETERS.gcsample
+old_run_customisable_func_only =
+    BenchmarkTools.DEFAULT_PARAMETERS.run_customisable_func_only
+old_enable_customisable_func = BenchmarkTools.DEFAULT_PARAMETERS.enable_customisable_func
+old_customisable_gcsample = BenchmarkTools.DEFAULT_PARAMETERS.customisable_gcsample
+old_setup_prehook = BenchmarkTools.DEFAULT_PARAMETERS.setup_prehook
+old_teardown_posthook = BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook
+old_sample_result = BenchmarkTools.DEFAULT_PARAMETERS.sample_result
+old_prehook = BenchmarkTools.DEFAULT_PARAMETERS.prehook
+old_posthook = BenchmarkTools.DEFAULT_PARAMETERS.posthook
+
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = p.seconds
 BenchmarkTools.DEFAULT_PARAMETERS.gctrial = p.gctrial
 BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = p.time_tolerance
@@ -43,6 +62,13 @@ BenchmarkTools.DEFAULT_PARAMETERS.samples = p.samples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = p.evals
 BenchmarkTools.DEFAULT_PARAMETERS.overhead = p.overhead
 BenchmarkTools.DEFAULT_PARAMETERS.gcsample = p.gcsample
+BenchmarkTools.DEFAULT_PARAMETERS.run_customisable_func_only = p.run_customisable_func_only
+BenchmarkTools.DEFAULT_PARAMETERS.enable_customisable_func = p.enable_customisable_func
+BenchmarkTools.DEFAULT_PARAMETERS.customisable_gcsample = p.customisable_gcsample
+BenchmarkTools.DEFAULT_PARAMETERS.setup_prehook = p.setup_prehook
+BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = p.teardown_posthook
+BenchmarkTools.DEFAULT_PARAMETERS.sample_result = p.sample_result
+
 @test p == Parameters()
 @test p == Parameters(p)
 BenchmarkTools.DEFAULT_PARAMETERS.seconds = oldseconds
@@ -53,5 +79,12 @@ BenchmarkTools.DEFAULT_PARAMETERS.samples = oldsamples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = oldevals
 BenchmarkTools.DEFAULT_PARAMETERS.overhead = oldoverhead
 BenchmarkTools.DEFAULT_PARAMETERS.gcsample = oldgcsample
+BenchmarkTools.DEFAULT_PARAMETERS.run_customisable_func_only =
+    old_run_customisable_func_only
+BenchmarkTools.DEFAULT_PARAMETERS.enable_customisable_func = old_enable_customisable_func
+BenchmarkTools.DEFAULT_PARAMETERS.customisable_gcsample = old_customisable_gcsample
+BenchmarkTools.DEFAULT_PARAMETERS.setup_prehook = old_setup_prehook
+BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = old_teardown_posthook
+BenchmarkTools.DEFAULT_PARAMETERS.sample_result = old_sample_result
 
 end # module
diff --git a/test/runtests.jl b/test/runtests.jl
index 6f58393a..ae76e702 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -34,3 +34,7 @@ println("done (took ", took_seconds, " seconds)")
 print("Testing serialization...")
 took_seconds = @elapsed include("SerializationTests.jl")
 println("done (took ", took_seconds, " seconds)")
+
+print("Testing custom benchmarking...")
+took_seconds = @elapsed include("CustomisableBenchmarkTests.jl")
+println("done (took ", took_seconds, " seconds)")

From 5036058ca8f2c75808bbd49ec0bd17671169e76a Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 14:28:37 +0100
Subject: [PATCH 21/29] Format

---
 test/CustomisableBenchmarkTests.jl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/CustomisableBenchmarkTests.jl b/test/CustomisableBenchmarkTests.jl
index 548e6023..9376c98f 100644
--- a/test/CustomisableBenchmarkTests.jl
+++ b/test/CustomisableBenchmarkTests.jl
@@ -22,8 +22,9 @@ end
 
 @testset "Disabled custom benchmarking" begin
     x[] = 0
-    res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook = posthook sample_result =
-        sample_result teardown_posthook = teardown_posthook run_customisable_func_only = false
+    res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook =
+        posthook sample_result = sample_result teardown_posthook = teardown_posthook run_customisable_func_only =
+        false
     @test res.customisable_result === nothing
     @test !res.customisable_result_for_every_sample
 end

From 749215c42201a6cbb0e549b9d5842af1fc0f8a5b Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 22:22:30 +0100
Subject: [PATCH 22/29] Move checking of parameter values into constructor

---
 src/execution.jl        |  4 ---
 src/parameters.jl       | 54 +++++++++++++++++++++++++++++++++++++++++
 src/serialization.jl    | 12 ++-------
 test/ParametersTests.jl | 19 +++++++++++++++
 4 files changed, 75 insertions(+), 14 deletions(-)

diff --git a/src/execution.jl b/src/execution.jl
index b8a4baee..94ebe825 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -110,10 +110,6 @@ end
 function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, kwargs...)
     params = Parameters(p; kwargs...)
     @assert params.seconds > 0.0 "time limit must be greater than 0.0"
-    @assert params.enable_customisable_func in (:FALSE, :ALL, :LAST) "invalid value $(params.enable_customisable_func) for enable_customisable_func which must be :FALSE, :ALL or :LAST"
-    @assert !(
-        params.run_customisable_func_only && params.enable_customisable_func == :FALSE
-    ) "run_customisable_func_only is set to true, but enable_customisable_func is set to :FALSE"
     if warmup #warmup sample
         params.run_customisable_func_only &&
             b.samplefunc(b.quote_vals, Parameters(params; evals=1))
diff --git a/src/parameters.jl b/src/parameters.jl
index 8470ac6e..b2e76236 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -23,6 +23,60 @@ mutable struct Parameters{A,B}
     sample_result
     prehook::A
     posthook::B
+
+    function Parameters{A,B}(
+        seconds,
+        samples,
+        evals,
+        evals_set,
+        overhead,
+        gctrial,
+        gcsample,
+        time_tolerance,
+        memory_tolerance,
+        run_customisable_func_only,
+        enable_customisable_func,
+        customisable_gcsample,
+        setup_prehook,
+        teardown_posthook,
+        sample_result,
+        prehook::A,
+        posthook::B,
+    ) where {A,B}
+        if enable_customisable_func ∉ (:FALSE, :ALL, :LAST)
+            throw(
+                ArgumentError(
+                    "invalid value $(enable_customisable_func) for enable_customisable_func which must be :FALSE, :ALL or :LAST",
+                ),
+            )
+        end
+        if run_customisable_func_only && enable_customisable_func == :FALSE
+            throw(
+                ArgumentError(
+                    "run_customisable_func_only is set to true, but enable_customisable_func is set to :FALSE",
+                ),
+            )
+        end
+        return new(
+            seconds,
+            samples,
+            evals,
+            evals_set,
+            overhead,
+            gctrial,
+            gcsample,
+            time_tolerance,
+            memory_tolerance,
+            run_customisable_func_only,
+            enable_customisable_func,
+            customisable_gcsample,
+            setup_prehook,
+            teardown_posthook,
+            sample_result,
+            prehook,
+            posthook,
+        )
+    end
 end
 
 # https://github.com/JuliaLang/julia/issues/17186
diff --git a/src/serialization.jl b/src/serialization.jl
index 92c95873..be2ff556 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -70,18 +70,10 @@ function recover(x::Vector)
                 # These fields should all be >= 0, so we can ignore -Inf case
                 typemax(ft)
             elseif fn == "enable_customisable_func"
-                if !haskey(fields, fn) || fields[fn] == "FALSE"
+                if !haskey(fields, fn)
                     :FALSE
-                elseif fields[fn] == "LAST"
-                    :LAST
-                elseif fields[fn] == "ALL"
-                    :ALL
                 else
-                    throw(
-                        ArgumentError(
-                            "Invalid value $(fields[fn]) for enable_customisable_func which must be one of :ALL, :LAST, :FALSE",
-                        ),
-                    )
+                    Symbol(fields[fn])
                 end
             elseif fn in (
                 "run_customisable_func_only",
diff --git a/test/ParametersTests.jl b/test/ParametersTests.jl
index 03fa1644..6a65396e 100644
--- a/test/ParametersTests.jl
+++ b/test/ParametersTests.jl
@@ -87,4 +87,23 @@ BenchmarkTools.DEFAULT_PARAMETERS.setup_prehook = old_setup_prehook
 BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = old_teardown_posthook
 BenchmarkTools.DEFAULT_PARAMETERS.sample_result = old_sample_result
 
+for vals in (false, true, :ARST, :TRUE, :false, :ON)
+    @test_throws ArgumentError Parameters(p; enable_customisable_func=vals)
+    @test_throws ArgumentError Parameters(; enable_customisable_func=vals)
+end
+
+@test_throws ArgumentError Parameters(;
+    enable_customisable_func=:FALSE, run_customisable_func_only=true
+)
+@test_nowarn Parameters(; enable_customisable_func=:FALSE, run_customisable_func_only=false)
+for run_customisable_func_only in (false, true)
+    @test_nowarn Parameters(;
+        enable_customisable_func=:ALL, run_customisable_func_only=run_customisable_func_only
+    )
+    @test_nowarn Parameters(;
+        enable_customisable_func=:LAST,
+        run_customisable_func_only=run_customisable_func_only,
+    )
+end
+
 end # module

From fe22190cdf25d0e704bcf3ed229b290d23542045 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 22:53:34 +0100
Subject: [PATCH 23/29] Simplify Deserialization

---
 src/serialization.jl | 51 ++++++++++++++++++--------------------------
 1 file changed, 21 insertions(+), 30 deletions(-)

diff --git a/src/serialization.jl b/src/serialization.jl
index be2ff556..2de899cd 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -53,41 +53,32 @@ function recover(x::Vector)
     for i in 1:fc
         ft = fieldtype(T, i)
         fn = String(fieldname(T, i))
-        if fn == "customisable_result"
-            xsi = customisable_result_recover(fields[fn])
+        xsi = if fn == "customisable_result"
+            customisable_result_recover(fields[fn])
+        elseif ft <: get(SUPPORTED_TYPES, nameof(ft), Union{})
+            recover(fields[fn])
         elseif fn in (
             "setup_prehook", "teardown_posthook", "sample_result", "prehook", "posthook"
         )
-            xsi = BenchmarkTools._nothing_func
-        elseif ft <: get(SUPPORTED_TYPES, nameof(ft), Union{})
-            xsi = recover(fields[fn])
-        else
-            xsi = if fn == "evals_set" && !haskey(fields, fn)
-                false
-            elseif fn in ("seconds", "overhead", "time_tolerance", "memory_tolerance") &&
-                fields[fn] === nothing
-                # JSON spec doesn't support Inf
-                # These fields should all be >= 0, so we can ignore -Inf case
-                typemax(ft)
-            elseif fn == "enable_customisable_func"
-                if !haskey(fields, fn)
-                    :FALSE
-                else
-                    Symbol(fields[fn])
-                end
-            elseif fn in (
-                "run_customisable_func_only",
-                "customisable_gcsample",
-                "setup_prehook",
-                "teardown_posthook",
-                "sample_result",
-                "prehook",
-                "posthook",
-            ) && !haskey(fields, fn)
-                getfield(BenchmarkTools.DEFAULT_PARAMETERS, Symbol(fn))
+            getfield(BenchmarkTools.DEFAULT_PARAMETERS, Symbol(fn))
+        elseif fn == "evals_set" && !haskey(fields, fn)
+            false
+        elseif fn in ("seconds", "overhead", "time_tolerance", "memory_tolerance") &&
+            fields[fn] === nothing
+            # JSON spec doesn't support Inf
+            # These fields should all be >= 0, so we can ignore -Inf case
+            typemax(ft)
+        elseif fn == "enable_customisable_func"
+            if !haskey(fields, fn)
+                :FALSE
             else
-                convert(ft, fields[fn])
+                Symbol(fields[fn])
             end
+        elseif fn in ("run_customisable_func_only", "customisable_gcsample") &&
+            !haskey(fields, fn)
+            getfield(BenchmarkTools.DEFAULT_PARAMETERS, Symbol(fn))
+        else
+            convert(ft, fields[fn])
         end
         if T == BenchmarkGroup && xsi isa Dict
             for (k, v) in copy(xsi)

From 95da3bad060c395418431f204d0e3d36bcee92b7 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Thu, 20 Jun 2024 23:03:05 +0100
Subject: [PATCH 24/29] Cleanup

---
 src/execution.jl  |  5 -----
 src/parameters.jl | 12 ++----------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/src/execution.jl b/src/execution.jl
index 94ebe825..db458ca2 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -628,13 +628,11 @@ function generate_benchmark_definition(
                 __prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
                     function (__evals)
                         prehook_result = $BenchmarkTools.samplefunc_prehook()
-                        # We'll run it evals times.
                         $BenchmarkTools.@noinline __return_val_2 = $(invocation)
                         for __iter in 2:__evals
                             $BenchmarkTools.@noinline $(invocation)
                         end
                         posthook_result = $BenchmarkTools.samplefunc_posthook()
-                        # trick the compiler not to eliminate the code
                         return prehook_result, posthook_result, __return_val_2
                     end
                 )(
@@ -655,8 +653,6 @@ function generate_benchmark_definition(
                         __params
                     )
                     $BenchmarkTools.@noinline $(setup)
-                    # Isolate code so that e.g. setup doesn't cause different code to be generated by e.g. changing register allocation
-                    # Unfortunately it still does, e.g. if you define a variable in setup then it's passed into invocation adding a few instructions
                     __prehook_result, __posthook_result, __return_val = $BenchmarkTools.@noinline (
                         function (__evals)
                             prehook_result = __params.prehook()
@@ -666,7 +662,6 @@ function generate_benchmark_definition(
                                 $BenchmarkTools.@noinline $(invocation)
                             end
                             posthook_result = __params.posthook()
-                            # trick the compiler not to eliminate the code
                             return prehook_result, posthook_result, __return_val_2
                         end
                     )(
diff --git a/src/parameters.jl b/src/parameters.jl
index b2e76236..a8c46512 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -244,16 +244,8 @@ function Parameters(
     else
         default.sample_result
     end
-    params_prehook = if prehook != nothing
-        prehook
-    else
-        default.prehook
-    end
-    params_posthook = if posthook != nothing
-        posthook
-    else
-        default.posthook
-    end
+    params_prehook = prehook != nothing ? prehook : default.prehook
+    params_posthook = posthook != nothing ? posthook : default.posthook
     return Parameters(
         params_seconds,
         params_samples,

From 05d971fefb5cb6f15d8b42dcddc7283f3177a054 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Fri, 21 Jun 2024 10:13:31 +0100
Subject: [PATCH 25/29] customisable -> customizable

---
 src/execution.jl                   | 50 ++++++++--------
 src/parameters.jl                  | 96 +++++++++++++++---------------
 src/serialization.jl               | 10 ++--
 src/trials.jl                      | 40 ++++++-------
 test/CustomisableBenchmarkTests.jl | 44 +++++++-------
 test/ParametersTests.jl            | 48 +++++++--------
 test/runtests.jl                   |  2 +-
 7 files changed, 145 insertions(+), 145 deletions(-)

diff --git a/src/execution.jl b/src/execution.jl
index db458ca2..43aafc52 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -16,7 +16,7 @@ end
 
 mutable struct Benchmark
     samplefunc
-    customisable_func
+    customizable_func
     quote_vals
     params::Parameters
 end
@@ -111,56 +111,56 @@ function _run(b::Benchmark, p::Parameters; verbose=false, pad="", warmup=true, k
     params = Parameters(p; kwargs...)
     @assert params.seconds > 0.0 "time limit must be greater than 0.0"
     if warmup #warmup sample
-        params.run_customisable_func_only &&
+        params.run_customizable_func_only &&
             b.samplefunc(b.quote_vals, Parameters(params; evals=1))
-        !params.run_customisable_func_only &&
-            b.customisable_func(b.quote_vals, Parameters(params; evals=1))
+        !params.run_customizable_func_only &&
+            b.customizable_func(b.quote_vals, Parameters(params; evals=1))
     end
     trial = Trial(params)
-    if params.enable_customisable_func == :ALL
-        trial.customisable_result = []
-        trial.customisable_result_for_every_sample = true
+    if params.enable_customizable_func == :ALL
+        trial.customizable_result = []
+        trial.customizable_result_for_every_sample = true
     end
     params.gctrial && gcscrub()
     start_time = Base.time()
 
     return_val = nothing
-    if !params.run_customisable_func_only
+    if !params.run_customizable_func_only
         s = b.samplefunc(b.quote_vals, params)
         push!(trial, s[1:(end - 1)]...)
         return_val = s[end]
     end
-    if params.enable_customisable_func == :ALL
-        params.customisable_gcsample && gcscrub()
-        s = b.customisable_func(b.quote_vals, params)
-        push!(trial.customisable_result, s[1])
+    if params.enable_customizable_func == :ALL
+        params.customizable_gcsample && gcscrub()
+        s = b.customizable_func(b.quote_vals, params)
+        push!(trial.customizable_result, s[1])
 
-        if params.run_customisable_func_only
+        if params.run_customizable_func_only
             return_val = s[end]
         end
     end
 
     iters = 2
     while (Base.time() - start_time) < params.seconds && iters ≤ params.samples
-        if !params.run_customisable_func_only
+        if !params.run_customizable_func_only
             params.gcsample && gcscrub()
             push!(trial, b.samplefunc(b.quote_vals, params)[1:(end - 1)]...)
         end
 
-        if params.enable_customisable_func == :ALL
-            params.customisable_gcsample && gcscrub()
-            push!(trial.customisable_result, b.customisable_func(b.quote_vals, params)[1])
+        if params.enable_customizable_func == :ALL
+            params.customizable_gcsample && gcscrub()
+            push!(trial.customizable_result, b.customizable_func(b.quote_vals, params)[1])
         end
 
         iters += 1
     end
 
-    if params.enable_customisable_func == :LAST
-        params.customisable_gcsample && gcscrub()
-        s = b.customisable_func(b.quote_vals, params)
-        trial.customisable_result = s[1]
+    if params.enable_customizable_func == :LAST
+        params.customizable_gcsample && gcscrub()
+        s = b.customizable_func(b.quote_vals, params)
+        trial.customizable_result = s[1]
 
-        if params.run_customisable_func_only
+        if params.run_customizable_func_only
             return_val = s[end]
         end
     end
@@ -578,7 +578,7 @@ function generate_benchmark_definition(
     @nospecialize
     corefunc = gensym("core")
     samplefunc = gensym("sample")
-    customisable_func = gensym("customisable")
+    customizable_func = gensym("customizable")
     type_vars = [gensym() for i in 1:(length(quote_vars) + length(setup_vars))]
     signature = Expr(:call, corefunc, quote_vars..., setup_vars...)
     signature_def = Expr(
@@ -644,7 +644,7 @@ function generate_benchmark_definition(
                 )...,
                 __return_val
             end
-            @noinline function $(customisable_func)(
+            @noinline function $(customizable_func)(
                 $(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters
             )
                 local __setup_prehook_result
@@ -680,7 +680,7 @@ function generate_benchmark_definition(
                 end
             end
             $BenchmarkTools.Benchmark(
-                $(samplefunc), $(customisable_func), $(quote_vals), $(params)
+                $(samplefunc), $(customizable_func), $(quote_vals), $(params)
             )
         end,
     )
diff --git a/src/parameters.jl b/src/parameters.jl
index a8c46512..142663f0 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -15,9 +15,9 @@ mutable struct Parameters{A,B}
     gcsample::Bool
     time_tolerance::Float64
     memory_tolerance::Float64
-    run_customisable_func_only::Bool
-    enable_customisable_func::Symbol
-    customisable_gcsample::Bool
+    run_customizable_func_only::Bool
+    enable_customizable_func::Symbol
+    customizable_gcsample::Bool
     setup_prehook
     teardown_posthook
     sample_result
@@ -34,26 +34,26 @@ mutable struct Parameters{A,B}
         gcsample,
         time_tolerance,
         memory_tolerance,
-        run_customisable_func_only,
-        enable_customisable_func,
-        customisable_gcsample,
+        run_customizable_func_only,
+        enable_customizable_func,
+        customizable_gcsample,
         setup_prehook,
         teardown_posthook,
         sample_result,
         prehook::A,
         posthook::B,
     ) where {A,B}
-        if enable_customisable_func ∉ (:FALSE, :ALL, :LAST)
+        if enable_customizable_func ∉ (:FALSE, :ALL, :LAST)
             throw(
                 ArgumentError(
-                    "invalid value $(enable_customisable_func) for enable_customisable_func which must be :FALSE, :ALL or :LAST",
+                    "invalid value $(enable_customizable_func) for enable_customizable_func which must be :FALSE, :ALL or :LAST",
                 ),
             )
         end
-        if run_customisable_func_only && enable_customisable_func == :FALSE
+        if run_customizable_func_only && enable_customizable_func == :FALSE
             throw(
                 ArgumentError(
-                    "run_customisable_func_only is set to true, but enable_customisable_func is set to :FALSE",
+                    "run_customizable_func_only is set to true, but enable_customizable_func is set to :FALSE",
                 ),
             )
         end
@@ -67,9 +67,9 @@ mutable struct Parameters{A,B}
             gcsample,
             time_tolerance,
             memory_tolerance,
-            run_customisable_func_only,
-            enable_customisable_func,
-            customisable_gcsample,
+            run_customizable_func_only,
+            enable_customizable_func,
+            customizable_gcsample,
             setup_prehook,
             teardown_posthook,
             sample_result,
@@ -90,9 +90,9 @@ function Parameters(
     gcsample,
     time_tolerance,
     memory_tolerance,
-    run_customisable_func_only,
-    enable_customisable_func,
-    customisable_gcsample,
+    run_customizable_func_only,
+    enable_customizable_func,
+    customizable_gcsample,
     setup_prehook,
     teardown_posthook,
     sample_result,
@@ -109,9 +109,9 @@ function Parameters(
         gcsample,
         time_tolerance,
         memory_tolerance,
-        run_customisable_func_only,
-        enable_customisable_func,
-        customisable_gcsample,
+        run_customizable_func_only,
+        enable_customizable_func,
+        customizable_gcsample,
         setup_prehook,
         teardown_posthook,
         sample_result,
@@ -131,11 +131,11 @@ DEFAULT_PARAMETERS = Parameters(
     false,
     0.05,
     0.01,
-    # Customisable Parameters
+    # customizable Parameters
     false,
     :FALSE,
     false,
-    # Customisable functions
+    # customizable functions
     _nothing_func,
     _nothing_func,
     _nothing_func,
@@ -153,9 +153,9 @@ function Parameters(;
     gcsample=DEFAULT_PARAMETERS.gcsample,
     time_tolerance=DEFAULT_PARAMETERS.time_tolerance,
     memory_tolerance=DEFAULT_PARAMETERS.memory_tolerance,
-    run_customisable_func_only=DEFAULT_PARAMETERS.run_customisable_func_only,
-    enable_customisable_func=DEFAULT_PARAMETERS.enable_customisable_func,
-    customisable_gcsample=DEFAULT_PARAMETERS.customisable_gcsample,
+    run_customizable_func_only=DEFAULT_PARAMETERS.run_customizable_func_only,
+    enable_customizable_func=DEFAULT_PARAMETERS.enable_customizable_func,
+    customizable_gcsample=DEFAULT_PARAMETERS.customizable_gcsample,
     setup_prehook=DEFAULT_PARAMETERS.setup_prehook,
     teardown_posthook=DEFAULT_PARAMETERS.teardown_posthook,
     sample_result=DEFAULT_PARAMETERS.sample_result,
@@ -172,9 +172,9 @@ function Parameters(;
         gcsample,
         time_tolerance,
         memory_tolerance,
-        run_customisable_func_only,
-        enable_customisable_func,
-        customisable_gcsample,
+        run_customizable_func_only,
+        enable_customizable_func,
+        customizable_gcsample,
         setup_prehook,
         teardown_posthook,
         sample_result,
@@ -194,9 +194,9 @@ function Parameters(
     gcsample=nothing,
     time_tolerance=nothing,
     memory_tolerance=nothing,
-    run_customisable_func_only=nothing,
-    enable_customisable_func=nothing,
-    customisable_gcsample=nothing,
+    run_customizable_func_only=nothing,
+    enable_customizable_func=nothing,
+    customizable_gcsample=nothing,
     setup_prehook=nothing,
     teardown_posthook=nothing,
     sample_result=nothing,
@@ -214,20 +214,20 @@ function Parameters(
         time_tolerance != nothing ? time_tolerance : default.time_tolerance
     params_memory_tolerance =
         memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
-    params_run_customisable_func_only = if run_customisable_func_only != nothing
-        run_customisable_func_only
+    params_run_customizable_func_only = if run_customizable_func_only != nothing
+        run_customizable_func_only
     else
-        default.run_customisable_func_only
+        default.run_customizable_func_only
     end
-    params_enable_customisable_func = if enable_customisable_func != nothing
-        enable_customisable_func
+    params_enable_customizable_func = if enable_customizable_func != nothing
+        enable_customizable_func
     else
-        default.enable_customisable_func
+        default.enable_customizable_func
     end
-    params_customisable_gcscrub = if customisable_gcsample != nothing
-        customisable_gcsample
+    params_customizable_gcscrub = if customizable_gcsample != nothing
+        customizable_gcsample
     else
-        default.customisable_gcsample
+        default.customizable_gcsample
     end
     params_setup_prehook = if setup_prehook != nothing
         setup_prehook
@@ -256,9 +256,9 @@ function Parameters(
         params_gcsample,
         params_time_tolerance,
         params_memory_tolerance,
-        params_run_customisable_func_only,
-        params_enable_customisable_func,
-        params_customisable_gcscrub,
+        params_run_customizable_func_only,
+        params_enable_customizable_func,
+        params_customizable_gcscrub,
         params_setup_prehook,
         params_teardown_posthook,
         params_sample_result,
@@ -276,9 +276,9 @@ function Base.:(==)(a::Parameters, b::Parameters)
            a.gcsample == b.gcsample &&
            a.time_tolerance == b.time_tolerance &&
            a.memory_tolerance == b.memory_tolerance &&
-           a.run_customisable_func_only == b.run_customisable_func_only &&
-           a.enable_customisable_func == b.enable_customisable_func &&
-           a.customisable_gcsample == b.customisable_gcsample &&
+           a.run_customizable_func_only == b.run_customizable_func_only &&
+           a.enable_customizable_func == b.enable_customizable_func &&
+           a.customizable_gcsample == b.customizable_gcsample &&
            a.setup_prehook == b.setup_prehook &&
            a.teardown_posthook == b.teardown_posthook &&
            a.sample_result == b.sample_result &&
@@ -297,9 +297,9 @@ function Base.copy(p::Parameters)
         p.gcsample,
         p.time_tolerance,
         p.memory_tolerance,
-        p.run_customisable_func_only,
-        p.enable_customisable_func,
-        p.customisable_gcsample,
+        p.run_customizable_func_only,
+        p.enable_customizable_func,
+        p.customizable_gcsample,
         p.setup_prehook,
         p.teardown_posthook,
         p.sample_result,
diff --git a/src/serialization.jl b/src/serialization.jl
index 2de899cd..3afabead 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -16,7 +16,7 @@ const SUPPORTED_TYPES = Dict{Symbol,Type}(
 )
 # n.b. Benchmark type not included here, since it is gensym'd
 
-customisable_result_recover(::Nothing) = nothing
+customizable_result_recover(::Nothing) = nothing
 
 function JSON.lower(x::Union{values(SUPPORTED_TYPES)...})
     d = Dict{String,Any}()
@@ -53,8 +53,8 @@ function recover(x::Vector)
     for i in 1:fc
         ft = fieldtype(T, i)
         fn = String(fieldname(T, i))
-        xsi = if fn == "customisable_result"
-            customisable_result_recover(fields[fn])
+        xsi = if fn == "customizable_result"
+            customizable_result_recover(fields[fn])
         elseif ft <: get(SUPPORTED_TYPES, nameof(ft), Union{})
             recover(fields[fn])
         elseif fn in (
@@ -68,13 +68,13 @@ function recover(x::Vector)
             # JSON spec doesn't support Inf
             # These fields should all be >= 0, so we can ignore -Inf case
             typemax(ft)
-        elseif fn == "enable_customisable_func"
+        elseif fn == "enable_customizable_func"
             if !haskey(fields, fn)
                 :FALSE
             else
                 Symbol(fields[fn])
             end
-        elseif fn in ("run_customisable_func_only", "customisable_gcsample") &&
+        elseif fn in ("run_customizable_func_only", "customizable_gcsample") &&
             !haskey(fields, fn)
             getfield(BenchmarkTools.DEFAULT_PARAMETERS, Symbol(fn))
         else
diff --git a/src/trials.jl b/src/trials.jl
index 6745e163..9cb55399 100644
--- a/src/trials.jl
+++ b/src/trials.jl
@@ -8,8 +8,8 @@ mutable struct Trial
     gctimes::Vector{Float64}
     memory::Int
     allocs::Int
-    customisable_result
-    customisable_result_for_every_sample::Bool
+    customizable_result
+    customizable_result_for_every_sample::Bool
 
     function Trial(
         params,
@@ -17,8 +17,8 @@ mutable struct Trial
         gctimes,
         memory,
         allocs,
-        customisable_result=nothing,
-        customisable_result_for_every_sample=false,
+        customizable_result=nothing,
+        customizable_result_for_every_sample=false,
     )
         return new(
             params,
@@ -26,8 +26,8 @@ mutable struct Trial
             gctimes,
             memory,
             allocs,
-            customisable_result,
-            customisable_result_for_every_sample,
+            customizable_result,
+            customizable_result_for_every_sample,
         )
     end
 end
@@ -49,10 +49,10 @@ function Base.copy(t::Trial)
         copy(t.gctimes),
         t.memory,
         t.allocs,
-        if t.customisable_result_for_every_sample
-            copy(t.customisable_result)
+        if t.customizable_result_for_every_sample
+            copy(t.customizable_result)
         else
-            t.customisable_result
+            t.customizable_result
         end,
     )
 end
@@ -79,10 +79,10 @@ function Base.getindex(t::Trial, i::Number)
         [t.gctimes[i]],
         t.memory,
         t.allocs,
-        if t.customisable_result_for_every_sample
-            [t.customisable_result[i]]
+        if t.customizable_result_for_every_sample
+            [t.customizable_result[i]]
         else
-            t.customisable_result
+            t.customizable_result
         end,
     )
 end
@@ -93,10 +93,10 @@ function Base.getindex(t::Trial, i)
         t.gctimes[i],
         t.memory,
         t.allocs,
-        if t.customisable_result_for_every_sample
-            t.customisable_result[i]
+        if t.customizable_result_for_every_sample
+            t.customizable_result[i]
         else
-            t.customisable_result
+            t.customizable_result
         end,
     )
 end
@@ -155,18 +155,18 @@ mutable struct TrialEstimate
     gctime::Float64
     memory::Int
     allocs::Int
-    customisable_result
+    customizable_result
 
     function TrialEstimate(
-        params, times, gctime, memory, allocs, customisable_result=nothing
+        params, times, gctime, memory, allocs, customizable_result=nothing
     )
-        return new(params, times, gctime, memory, allocs, customisable_result)
+        return new(params, times, gctime, memory, allocs, customizable_result)
     end
 end
 
 function TrialEstimate(trial::Trial, t, gct)
     return TrialEstimate(
-        params(trial), t, gct, memory(trial), allocs(trial), trial.customisable_result
+        params(trial), t, gct, memory(trial), allocs(trial), trial.customizable_result
     )
 end
 
@@ -180,7 +180,7 @@ end
 
 function Base.copy(t::TrialEstimate)
     return TrialEstimate(
-        copy(t.params), t.time, t.gctime, t.memory, t.allocs, t.customisable_result
+        copy(t.params), t.time, t.gctime, t.memory, t.allocs, t.customizable_result
     )
 end
 
diff --git a/test/CustomisableBenchmarkTests.jl b/test/CustomisableBenchmarkTests.jl
index 9376c98f..2cb9e4b6 100644
--- a/test/CustomisableBenchmarkTests.jl
+++ b/test/CustomisableBenchmarkTests.jl
@@ -1,4 +1,4 @@
-module CustomisableBenchmarkTests
+module customizableBenchmarkTests
 
 using BenchmarkTools
 using Test
@@ -23,49 +23,49 @@ end
 @testset "Disabled custom benchmarking" begin
     x[] = 0
     res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook =
-        posthook sample_result = sample_result teardown_posthook = teardown_posthook run_customisable_func_only =
+        posthook sample_result = sample_result teardown_posthook = teardown_posthook run_customizable_func_only =
         false
-    @test res.customisable_result === nothing
-    @test !res.customisable_result_for_every_sample
+    @test res.customizable_result === nothing
+    @test !res.customizable_result_for_every_sample
 end
 
 @testset "custom benchmarking last" begin
-    for run_customisable_func_only in (true, false)
+    for run_customizable_func_only in (true, false)
         x[] = 0
         res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook =
-            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customisable_func =
-            :LAST run_customisable_func_only = run_customisable_func_only
-        if run_customisable_func_only
+            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customizable_func =
+            :LAST run_customizable_func_only = run_customizable_func_only
+        if run_customizable_func_only
             @test isempty(res.times)
             @test isempty(res.gctimes)
             @test res.memory == typemax(Int)
             @test res.allocs == typemax(Int)
         end
-        @test !res.customisable_result_for_every_sample
-        @test res.customisable_result === 4
+        @test !res.customizable_result_for_every_sample
+        @test res.customizable_result === 4
     end
 end
 
 @testset "custom benchmark every sample, independent of iterations" begin
-    for run_customisable_func_only in (true, false)
+    for run_customizable_func_only in (true, false)
         x[] = 0
         setup_prehook(_) = x[] = 1
         res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook =
-            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customisable_func =
-            :ALL run_customisable_func_only = run_customisable_func_only samples = 1000
-        if run_customisable_func_only
+            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customizable_func =
+            :ALL run_customizable_func_only = run_customizable_func_only samples = 1000
+        if run_customizable_func_only
             @test isempty(res.times)
             @test isempty(res.gctimes)
             @test res.memory == typemax(Int)
             @test res.allocs == typemax(Int)
         end
-        @test res.customisable_result_for_every_sample
-        @test res.customisable_result == fill(4, 1000)
+        @test res.customizable_result_for_every_sample
+        @test res.customizable_result == fill(4, 1000)
     end
 end
 
 @testset "custom benchmark every sample with iteration dependence" begin
-    for run_customisable_func_only in (true, false)
+    for run_customizable_func_only in (true, false)
         x[] = 0
         setup_prehook(_) = x[] += 1
         prehook() = x[] += 1
@@ -77,16 +77,16 @@ end
             return x[] += 1
         end
         res = @benchmark nothing setup_prehook = setup_prehook prehook = prehook posthook =
-            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customisable_func =
-            :ALL run_customisable_func_only = run_customisable_func_only samples = 1000
-        if run_customisable_func_only
+            posthook sample_result = sample_result teardown_posthook = teardown_posthook enable_customizable_func =
+            :ALL run_customizable_func_only = run_customizable_func_only samples = 1000
+        if run_customizable_func_only
             @test isempty(res.times)
             @test isempty(res.gctimes)
             @test res.memory == typemax(Int)
             @test res.allocs == typemax(Int)
         end
-        @test res.customisable_result_for_every_sample
-        @test res.customisable_result == collect(5 * (1:1000) .- 1)
+        @test res.customizable_result_for_every_sample
+        @test res.customizable_result == collect(5 * (1:1000) .- 1)
     end
 end
 
diff --git a/test/ParametersTests.jl b/test/ParametersTests.jl
index 6a65396e..167fce77 100644
--- a/test/ParametersTests.jl
+++ b/test/ParametersTests.jl
@@ -27,11 +27,11 @@ p = Parameters(;
     gcsample=false,
     time_tolerance=0.043,
     memory_tolerance=0.15,
-    # Customisable Parameters
-    run_customisable_func_only=true,
-    enable_customisable_func=:ALL,
-    customisable_gcsample=true,
-    # Customisable functions
+    # customizable Parameters
+    run_customizable_func_only=true,
+    enable_customizable_func=:ALL,
+    customizable_gcsample=true,
+    # customizable functions
     setup_prehook=f,
     teardown_posthook=f,
     sample_result=f,
@@ -44,10 +44,10 @@ oldsamples = BenchmarkTools.DEFAULT_PARAMETERS.samples
 oldevals = BenchmarkTools.DEFAULT_PARAMETERS.evals
 oldoverhead = BenchmarkTools.DEFAULT_PARAMETERS.overhead
 oldgcsample = BenchmarkTools.DEFAULT_PARAMETERS.gcsample
-old_run_customisable_func_only =
-    BenchmarkTools.DEFAULT_PARAMETERS.run_customisable_func_only
-old_enable_customisable_func = BenchmarkTools.DEFAULT_PARAMETERS.enable_customisable_func
-old_customisable_gcsample = BenchmarkTools.DEFAULT_PARAMETERS.customisable_gcsample
+old_run_customizable_func_only =
+    BenchmarkTools.DEFAULT_PARAMETERS.run_customizable_func_only
+old_enable_customizable_func = BenchmarkTools.DEFAULT_PARAMETERS.enable_customizable_func
+old_customizable_gcsample = BenchmarkTools.DEFAULT_PARAMETERS.customizable_gcsample
 old_setup_prehook = BenchmarkTools.DEFAULT_PARAMETERS.setup_prehook
 old_teardown_posthook = BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook
 old_sample_result = BenchmarkTools.DEFAULT_PARAMETERS.sample_result
@@ -62,9 +62,9 @@ BenchmarkTools.DEFAULT_PARAMETERS.samples = p.samples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = p.evals
 BenchmarkTools.DEFAULT_PARAMETERS.overhead = p.overhead
 BenchmarkTools.DEFAULT_PARAMETERS.gcsample = p.gcsample
-BenchmarkTools.DEFAULT_PARAMETERS.run_customisable_func_only = p.run_customisable_func_only
-BenchmarkTools.DEFAULT_PARAMETERS.enable_customisable_func = p.enable_customisable_func
-BenchmarkTools.DEFAULT_PARAMETERS.customisable_gcsample = p.customisable_gcsample
+BenchmarkTools.DEFAULT_PARAMETERS.run_customizable_func_only = p.run_customizable_func_only
+BenchmarkTools.DEFAULT_PARAMETERS.enable_customizable_func = p.enable_customizable_func
+BenchmarkTools.DEFAULT_PARAMETERS.customizable_gcsample = p.customizable_gcsample
 BenchmarkTools.DEFAULT_PARAMETERS.setup_prehook = p.setup_prehook
 BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = p.teardown_posthook
 BenchmarkTools.DEFAULT_PARAMETERS.sample_result = p.sample_result
@@ -79,30 +79,30 @@ BenchmarkTools.DEFAULT_PARAMETERS.samples = oldsamples
 BenchmarkTools.DEFAULT_PARAMETERS.evals = oldevals
 BenchmarkTools.DEFAULT_PARAMETERS.overhead = oldoverhead
 BenchmarkTools.DEFAULT_PARAMETERS.gcsample = oldgcsample
-BenchmarkTools.DEFAULT_PARAMETERS.run_customisable_func_only =
-    old_run_customisable_func_only
-BenchmarkTools.DEFAULT_PARAMETERS.enable_customisable_func = old_enable_customisable_func
-BenchmarkTools.DEFAULT_PARAMETERS.customisable_gcsample = old_customisable_gcsample
+BenchmarkTools.DEFAULT_PARAMETERS.run_customizable_func_only =
+    old_run_customizable_func_only
+BenchmarkTools.DEFAULT_PARAMETERS.enable_customizable_func = old_enable_customizable_func
+BenchmarkTools.DEFAULT_PARAMETERS.customizable_gcsample = old_customizable_gcsample
 BenchmarkTools.DEFAULT_PARAMETERS.setup_prehook = old_setup_prehook
 BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = old_teardown_posthook
 BenchmarkTools.DEFAULT_PARAMETERS.sample_result = old_sample_result
 
 for vals in (false, true, :ARST, :TRUE, :false, :ON)
-    @test_throws ArgumentError Parameters(p; enable_customisable_func=vals)
-    @test_throws ArgumentError Parameters(; enable_customisable_func=vals)
+    @test_throws ArgumentError Parameters(p; enable_customizable_func=vals)
+    @test_throws ArgumentError Parameters(; enable_customizable_func=vals)
 end
 
 @test_throws ArgumentError Parameters(;
-    enable_customisable_func=:FALSE, run_customisable_func_only=true
+    enable_customizable_func=:FALSE, run_customizable_func_only=true
 )
-@test_nowarn Parameters(; enable_customisable_func=:FALSE, run_customisable_func_only=false)
-for run_customisable_func_only in (false, true)
+@test_nowarn Parameters(; enable_customizable_func=:FALSE, run_customizable_func_only=false)
+for run_customizable_func_only in (false, true)
     @test_nowarn Parameters(;
-        enable_customisable_func=:ALL, run_customisable_func_only=run_customisable_func_only
+        enable_customizable_func=:ALL, run_customizable_func_only=run_customizable_func_only
     )
     @test_nowarn Parameters(;
-        enable_customisable_func=:LAST,
-        run_customisable_func_only=run_customisable_func_only,
+        enable_customizable_func=:LAST,
+        run_customizable_func_only=run_customizable_func_only,
     )
 end
 
diff --git a/test/runtests.jl b/test/runtests.jl
index ae76e702..d17203d0 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -36,5 +36,5 @@ took_seconds = @elapsed include("SerializationTests.jl")
 println("done (took ", took_seconds, " seconds)")
 
 print("Testing custom benchmarking...")
-took_seconds = @elapsed include("CustomisableBenchmarkTests.jl")
+took_seconds = @elapsed include("customizableBenchmarkTests.jl")
 println("done (took ", took_seconds, " seconds)")

From 6008719b8c26aec82178da64c69809f4d84f277c Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Fri, 21 Jun 2024 10:51:01 +0100
Subject: [PATCH 26/29] Add some rudimentary documentation

---
 docs/src/manual.md | 71 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index df2755cf..ba83ccf8 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -86,7 +86,18 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
 - `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
 - `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
 
-To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:
+The following keyword arguments relate to [Running custom benchmarks] are experimental and subject to change, see [Running custom benchmarks] for furthe details.:
+
+- `run_customizable_func_only`: If `true`, only the customizable benchmark. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS..run_customizable_func_only = false`.
+- `enable_customizable_func`: If `:ALL` the customizable benchmark runs on every sample, if `:LAST` the customizable benchmark runs on the last sample, if `:FALSE` the customizable benchmark is never run. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.enable_customizable_func = :FALSE`
+- `customizable_gcsample`: If `true`, runs `gc()` before each sample of the customizable benchmark. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.customizable_gcsample = false`
+- `setup_prehook`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+- `teardown_posthook`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+- `sample_result`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+- `prehook`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+- `posthook`: Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.teardown_posthook = _nothing_func`, which returns nothing.
+
+To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS` (this is not supported for `prehook` and `posthook`), for example:
 
 ```julia
 # change default for `seconds` to 2.5
@@ -347,10 +358,20 @@ BenchmarkTools.Trial
     gcsample: Bool false
     time_tolerance: Float64 0.05
     memory_tolerance: Float64 0.01
+    run_customizable_func_only: Bool false
+    enable_customizable_func: Symbol FALSE
+    customizable_gcsample: Bool false
+    setup_prehook: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
+    teardown_posthook: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
+    sample_result: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
+    prehook: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
+    posthook: _nothing_func (function of type typeof(BenchmarkTools._nothing_func))
   times: Array{Float64}((10000,)) [26549.0, 26960.0, 27030.0, 27171.0, 27211.0, 27261.0, 27270.0, 27311.0, 27311.0, 27321.0  …  55383.0, 55934.0, 58649.0, 62847.0, 68547.0, 75761.0, 247081.0, 1.421718e6, 1.488322e6, 1.50329e6]
   gctimes: Array{Float64}((10000,)) [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.366184e6, 1.389518e6, 1.40116e6]
   memory: Int64 16752
   allocs: Int64 19
+  customizable_result: Nothing nothing
+  customizable_result_for_every_sample: Bool false
 ```
 
 As you can see from the above, a couple of different timing estimates are pretty-printed with the `Trial`. You can calculate these estimates yourself using the `minimum`, `maximum`, `median`, `mean`, and `std` functions (Note that `median`, `mean`, and `std` are reexported in `BenchmarkTools` from `Statistics`):
@@ -1008,3 +1029,51 @@ This will display each `Trial` as a violin plot.
 - BenchmarkTools attempts to be robust against machine noise occurring between *samples*, but BenchmarkTools can't do very much about machine noise occurring between *trials*. To cut down on the latter kind of noise, it is advised that you dedicate CPUs and memory to the benchmarking Julia process by using a shielding tool such as [cset](http://manpages.ubuntu.com/manpages/precise/man1/cset.1.html).
 - On some machines, for some versions of BLAS and Julia, the number of BLAS worker threads can exceed the number of available cores. This can occasionally result in scheduling issues and inconsistent performance for BLAS-heavy benchmarks. To fix this issue, you can use `BLAS.set_num_threads(i::Int)` in the Julia REPL to ensure that the number of BLAS threads is equal to or less than the number of available cores.
 - `@benchmark` is evaluated in global scope, even if called from local scope.
+
+## Experimental - Running custom benchmarks
+
+If you want to run code during a benchmark, e.g. to collect different metrics, say using perf, you can configure a custom benchmark.
+A custom benchmark runs in the following way, where `benchmark_function` is the function we are benchmarking:
+```julia
+local setup_prehook_result
+try
+  setup_prehook_result = setup_prehook(params)
+  $(setup)
+  prehook_result = prehook()
+  for _ in 1:evals
+    benchmark_function()
+  end
+  posthook_result = posthook()
+  return sample_result(params, setup_prehook_result, prehook_result, posthook_result)
+finally
+  $(teardown)
+  teardown_posthook(params, setup_prehook_result)
+end
+```
+The result from `sample_result` is collected and can be accessed from the `customizable_result` field of `Trial`, which is the type of a benchmark result.
+
+Note that `prehook` and `posthook` should be as simple and fast as possible, moving any heavy lifting to `setup_prehook`, `sample_result` and `teardown_posthook`.
+
+As an example, these are the hooks to replicate the normal benchmarking functionality
+```julia
+setup_prehook(_) = nothing
+samplefunc_prehook() = (Base.gc_num(), time_ns())
+samplefunc_posthook = samplefunc_prehook
+function samplefunc_sample_result(params, _, prehook_result, posthook_result)
+    evals = params.evals
+    sample_time = posthook_result[2] - prehook_result[2]
+    gcdiff = Base.GC_Diff(posthook_result[1], prehook_result[1])
+
+    time = max((sample_time / evals) - params.overhead, 0.001)
+    gctime = max((gcdiff.total_time / evals) - params.overhead, 0.0)
+    memory = Int(Base.fld(gcdiff.allocd, evals))
+    allocs = Int(
+        Base.fld(
+            gcdiff.malloc + gcdiff.realloc + gcdiff.poolalloc + gcdiff.bigalloc,
+            evals,
+        ),
+    )
+    return time, gctime, memory, allocs
+end
+teardown_posthook(_, _) = nothing
+```

From e1eb8ac75b7077f862d5d51d038c1b1d056508cd Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Fri, 21 Jun 2024 10:52:31 +0100
Subject: [PATCH 27/29] Rename customisableBenchmarkTests

---
 ...tomisableBenchmarkTests.jl => CustomizableBenchmarkTests.jl} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename test/{CustomisableBenchmarkTests.jl => CustomizableBenchmarkTests.jl} (99%)

diff --git a/test/CustomisableBenchmarkTests.jl b/test/CustomizableBenchmarkTests.jl
similarity index 99%
rename from test/CustomisableBenchmarkTests.jl
rename to test/CustomizableBenchmarkTests.jl
index 2cb9e4b6..e66ca38f 100644
--- a/test/CustomisableBenchmarkTests.jl
+++ b/test/CustomizableBenchmarkTests.jl
@@ -1,4 +1,4 @@
-module customizableBenchmarkTests
+module CustomizableBenchmarkTests
 
 using BenchmarkTools
 using Test

From 496bfe03a4f62f322d2f8e4a7bfbac7726b2269f Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Fri, 21 Jun 2024 11:13:30 +0100
Subject: [PATCH 28/29] Fix typo

---
 test/runtests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index d17203d0..82fd4c8f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -36,5 +36,5 @@ took_seconds = @elapsed include("SerializationTests.jl")
 println("done (took ", took_seconds, " seconds)")
 
 print("Testing custom benchmarking...")
-took_seconds = @elapsed include("customizableBenchmarkTests.jl")
+took_seconds = @elapsed include("CustomizableBenchmarkTests.jl")
 println("done (took ", took_seconds, " seconds)")

From a77bfca26390681d889460366cb9980ba36103c9 Mon Sep 17 00:00:00 2001
From: Zentrik <Zentrik@users.noreply.github.com>
Date: Fri, 21 Jun 2024 11:42:02 +0100
Subject: [PATCH 29/29] Fixup doc

---
 docs/src/manual.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/manual.md b/docs/src/manual.md
index ba83ccf8..337343cf 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -86,7 +86,7 @@ You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`,
 - `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
 - `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
 
-The following keyword arguments relate to [Running custom benchmarks] are experimental and subject to change, see [Running custom benchmarks] for furthe details.:
+The following keyword arguments relate are experimental and subject to change, see [Running custom benchmarks](@ref) for further details:
 
 - `run_customizable_func_only`: If `true`, only the customizable benchmark. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS..run_customizable_func_only = false`.
 - `enable_customizable_func`: If `:ALL` the customizable benchmark runs on every sample, if `:LAST` the customizable benchmark runs on the last sample, if `:FALSE` the customizable benchmark is never run. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.enable_customizable_func = :FALSE`