diff --git a/.gitignore b/.gitignore
index 00745b66..cfc0fdf1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,5 @@ test/x.json
 docs/Manifest.toml
 docs/build
 docs/src/assets/indigo.css
-Manifest.toml
\ No newline at end of file
+Manifest.toml
+misc/
diff --git a/Project.toml b/Project.toml
index 65cf9bb5..833b3cd2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
-name = "BenchmarkTools"
-uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-version = "1.3.1"
+name = "BenchmarkExt"
+uuid = "f97012b4-579f-4109-a65c-74c581afaadd"
+version = "0.1.1"
 
 [deps]
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
@@ -15,8 +15,10 @@ JSON = "0.18, 0.19, 0.20, 0.21"
 julia = "1"
 
 [extras]
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+ReTest = "e0db7c4e-2690-44b9-bad6-7687da720f89"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Statistics", "Test"]
+test = ["Statistics", "ReTest", "StableRNGs"]
diff --git a/README.md b/README.md
index 93226b3b..1c6ab998 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,11 @@
-# BenchmarkTools.jl
+# BenchmarkExt.jl
 
 [![][docs-stable-img]][docs-stable-url]
 [![][docs-dev-img]][docs-dev-url]
-[![Build Status](https://github.com/JuliaCI/BenchmarkTools.jl/workflows/CI/badge.svg)](https://github.com/JuliaCI/BenchmarkTools.jl/actions/workflows/CI.yml?query=branch%3Amaster)
-[![Code Coverage](https://codecov.io/gh/JuliaCI/BenchmarkTools.jl/branch/master/graph/badge.svg?label=codecov&token=ccN7NZpkBx)](https://codecov.io/gh/JuliaCI/BenchmarkTools.jl)
+[![Build Status](https://github.com/JuliaCI/BenchmarkExt.jl/workflows/CI/badge.svg)](https://github.com/JuliaCI/BenchmarkExt.jl/actions/workflows/CI.yml?query=branch%3Amaster)
+[![Code Coverage](https://codecov.io/gh/JuliaCI/BenchmarkExt.jl/branch/master/graph/badge.svg?label=codecov&token=ccN7NZpkBx)](https://codecov.io/gh/JuliaCI/BenchmarkExt.jl)
 
-BenchmarkTools makes **performance tracking of Julia code easy** by supplying a framework for **writing and running groups of benchmarks** as well as **comparing benchmark results**.
+BenchmarkExt makes **performance tracking of Julia code easy** by supplying a framework for **writing and running groups of benchmarks** as well as **comparing benchmark results**.
 
 This package is used to write and run the benchmarks found in [BaseBenchmarks.jl](https://github.com/JuliaCI/BaseBenchmarks.jl).
 
@@ -14,44 +14,44 @@ The CI infrastructure for automated performance testing of the Julia language is
 ## Installation
 
 <p>
-BenchmarkTools is a &nbsp;
+BenchmarkExt is a &nbsp;
     <a href="https://julialang.org">
         <img src="https://raw.githubusercontent.com/JuliaLang/julia-logo-graphics/master/images/julia.ico" width="16em">
         Julia Language
     </a>
-    &nbsp; package. To install BenchmarkTools,
+    &nbsp; package. To install BenchmarkExt,
     please <a href="https://docs.julialang.org/en/v1/manual/getting-started/">open
     Julia's interactive session (known as REPL)</a> and press <kbd>]</kbd> key in the REPL to use the package mode, then type the following command
 </p>
 
 ```julia
-pkg> add BenchmarkTools
+pkg> add BenchmarkExt
 ```
 
 ## Documentation
 
-If you're just getting started, check out the [manual](https://juliaci.github.io/BenchmarkTools.jl/dev/manual/) for a thorough explanation of BenchmarkTools.
+If you're just getting started, check out the [manual](https://juliaci.github.io/BenchmarkExt.jl/dev/manual/) for a thorough explanation of BenchmarkExt.
 
-If you want to explore the BenchmarkTools API, see the [reference document](https://juliaci.github.io/BenchmarkTools.jl/dev/reference/).
+If you want to explore the BenchmarkExt API, see the [reference document](https://juliaci.github.io/BenchmarkExt.jl/dev/reference/).
 
 If you want a short example of a toy benchmark suite, see the sample file in this repo ([benchmark/benchmarks.jl](benchmark/benchmarks.jl)).
 
 If you want an extensive example of a benchmark suite being used in the real world, you can look at the source code of [BaseBenchmarks.jl](https://github.com/JuliaCI/BaseBenchmarks.jl/tree/nanosoldier).
 
-If you're benchmarking on Linux, I wrote up a series of [tips and tricks](https://juliaci.github.io/BenchmarkTools.jl/dev/linuxtips/) to help eliminate noise during performance tests.
+If you're benchmarking on Linux, I wrote up a series of [tips and tricks](https://juliaci.github.io/BenchmarkExt.jl/dev/linuxtips/) to help eliminate noise during performance tests.
 
 ## Quick Start
 
-The primary macro provided by BenchmarkTools is `@benchmark`:
+The primary macro provided by BenchmarkExt is `@benchmark`:
 
 ```julia
-julia> using BenchmarkTools
+julia> using BenchmarkExt
 
 # The `setup` expression is run once per sample, and is not included in the
 # timing results. Note that each sample can require multiple evaluations
-# benchmark kernel evaluations. See the BenchmarkTools manual for details.
+# benchmark kernel evaluations. See the BenchmarkExt manual for details.
 julia> @benchmark sort(data) setup=(data=rand(10))
-BenchmarkTools.Trial: 10000 samples with 972 evaluations.
+BenchmarkExt.Trial: 10000 samples with 972 evaluations.
  Range (min … max):  69.399 ns …  1.066 μs  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     83.850 ns              ┊ GC (median):    0.00%
  Time  (mean ± σ):   89.471 ns ± 53.666 ns  ┊ GC (mean ± σ):  3.25% ± 5.16%
@@ -63,7 +63,7 @@ BenchmarkTools.Trial: 10000 samples with 972 evaluations.
  Memory estimate: 160 bytes, allocs estimate: 1.
 ```
 
-For quick sanity checks, one can use the [`@btime` macro](https://juliaci.github.io/BenchmarkTools.jl/stable/manual/#Benchmarking-basics), which is a convenience wrapper around `@benchmark` whose output is analogous to Julia's built-in [`@time` macro](https://docs.julialang.org/en/v1/base/base/#Base.@time):
+For quick sanity checks, one can use the [`@btime` macro](https://juliaci.github.io/BenchmarkExt.jl/stable/manual/#Benchmarking-basics), which is a convenience wrapper around `@benchmark` whose output is analogous to Julia's built-in [`@time` macro](https://docs.julialang.org/en/v1/base/base/#Base.@time):
 
 ```julia
 # The `seconds` expression helps set a rough time budget, see Manual for more explaination
@@ -72,7 +72,7 @@ julia> @btime sin(x) setup=(x=rand()) seconds=3
 0.49587200950472454
 ```
 
-If the expression you want to benchmark depends on external variables, you should use [`$` to "interpolate"](https://juliaci.github.io/BenchmarkTools.jl/stable/manual/#Interpolating-values-into-benchmark-expressions) them into the benchmark expression to
+If the expression you want to benchmark depends on external variables, you should use [`$` to "interpolate"](https://juliaci.github.io/BenchmarkExt.jl/stable/manual/#Interpolating-values-into-benchmark-expressions) them into the benchmark expression to
 [avoid the problems of benchmarking with globals](https://docs.julialang.org/en/v1/manual/performance-tips/#Avoid-global-variables).
 Essentially, any interpolated variable `$x` or expression `$(...)` is "pre-computed" before benchmarking begins:
 
@@ -105,7 +105,7 @@ julia> @btime $(Ref(a))[] + $(Ref(b))[]
 3
 ```
 
-As described the [manual](https://juliaci.github.io/BenchmarkTools.jl/dev/manual/), the BenchmarkTools package supports many other features, both for additional output and for more fine-grained control over the benchmarking process.
+As described the [manual](https://juliaci.github.io/BenchmarkExt.jl/dev/manual/), the BenchmarkExt package supports many other features, both for additional output and for more fine-grained control over the benchmarking process.
 
 ## Why does this package exist?
 
@@ -119,7 +119,7 @@ For a while, the Benchmarks + BenchmarkTrackers system was used for automated pe
 4. Running benchmarks took a long time - an order of magnitude longer than theoretically necessary for many functions.
 5. Using the system in the REPL (for example, to reproduce regressions locally) was often cumbersome.
 
-The BenchmarkTools package is a response to these issues, designed by examining user reports and the benchmark data generated by the old system. BenchmarkTools offers the following solutions to the corresponding issues above:
+The BenchmarkExt package is a response to these issues, designed by examining user reports and the benchmark data generated by the old system. BenchmarkExt offers the following solutions to the corresponding issues above:
 
 1. Benchmark execution parameters are configured separately from the execution of the benchmark itself. This means that subsequent experiments are performed more consistently, avoiding branching "substrategies" based on small numbers of samples.
 2. A variety of simple estimators are supported, and the user can pick which one to use for regression detection.
@@ -131,12 +131,12 @@ The BenchmarkTools package is a response to these issues, designed by examining
 
 This package was authored primarily by Jarrett Revels (@jrevels). Additionally, I'd like to thank the following people:
 
-- John Myles White, for authoring the original Benchmarks package, which greatly inspired BenchmarkTools
+- John Myles White, for authoring the original Benchmarks package, which greatly inspired BenchmarkExt
 - Andreas Noack, for statistics help and investigating weird benchmark time distributions
 - Oscar Blumberg, for discussions on noise robustness
 - Jiahao Chen, for discussions on error analysis
 
 [docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg
-[docs-dev-url]: https://JuliaCI.github.io/BenchmarkTools.jl/dev/
+[docs-dev-url]: https://JuliaCI.github.io/BenchmarkExt.jl/dev/
 [docs-stable-img]: https://img.shields.io/badge/docs-stable-blue.svg
-[docs-stable-url]: https://JuliaCI.github.io/BenchmarkTools.jl/stable
+[docs-stable-url]: https://JuliaCI.github.io/BenchmarkExt.jl/stable
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index cd0781a9..08068c5d 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -1,5 +1,5 @@
 
-using BenchmarkTools
+using BenchmarkExt
 
 # Define a parent BenchmarkGroup to contain our suite
 const suite = BenchmarkGroup()
@@ -30,8 +30,8 @@ end
 paramspath = joinpath(dirname(@__FILE__), "params.json")
 
 if isfile(paramspath)
-    loadparams!(suite, BenchmarkTools.load(paramspath)[1], :evals);
+    loadparams!(suite, BenchmarkExt.load(paramspath)[1], :evals);
 else
     tune!(suite)
-    BenchmarkTools.save(paramspath, params(suite));
+    BenchmarkExt.save(paramspath, params(suite));
 end
diff --git a/docs/Project.toml b/docs/Project.toml
index b48d87cc..cea35637 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,4 +1,4 @@
 [deps]
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+BenchmarkExt = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 DocThemeIndigo = "8bac0ac5-51bf-41f9-885e-2bf1ac2bec5f"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
diff --git a/docs/make.jl b/docs/make.jl
index 098aa403..2400391b 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,15 +1,15 @@
-using BenchmarkTools
+using BenchmarkExt
 using Documenter
 using DocThemeIndigo
-indigo = DocThemeIndigo.install(BenchmarkTools)
+indigo = DocThemeIndigo.install(BenchmarkExt)
 
 makedocs(;
-    modules=[BenchmarkTools],
-    repo="https://github.com/JuliaCI/BenchmarkTools.jl/blob/{commit}{path}#{line}",
-    sitename="BenchmarkTools.jl",
+    modules=[BenchmarkExt],
+    repo="https://github.com/JuliaCI/BenchmarkExt.jl/blob/{commit}{path}#{line}",
+    sitename="BenchmarkExt.jl",
     format=Documenter.HTML(;
         prettyurls=get(ENV, "CI", "false") == "true",
-        canonical="https://JuliaCI.github.io/BenchmarkTools.jl",
+        canonical="https://JuliaCI.github.io/BenchmarkExt.jl",
         assets=String[indigo],
     ),
     pages=[
@@ -22,5 +22,5 @@ makedocs(;
 )
 
 deploydocs(;
-    repo="github.com/JuliaCI/BenchmarkTools.jl",
+    repo="github.com/JuliaCI/BenchmarkExt.jl",
 )
diff --git a/docs/src/index.md b/docs/src/index.md
index a3da1678..049511e8 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -1,6 +1,6 @@
-# BenchmarkTools
+# BenchmarkExt
 
-BenchmarkTools makes **performance tracking of Julia code easy** by supplying a framework for **writing and running groups of benchmarks** as well as **comparing benchmark results**.
+BenchmarkExt makes **performance tracking of Julia code easy** by supplying a framework for **writing and running groups of benchmarks** as well as **comparing benchmark results**.
 
 This package is used to write and run the benchmarks found in [BaseBenchmarks.jl](https://github.com/JuliaCI/BaseBenchmarks.jl).
 
@@ -8,16 +8,16 @@ The CI infrastructure for automated performance testing of the Julia language is
 
 ## Quick Start
 
-The primary macro provided by BenchmarkTools is `@benchmark`:
+The primary macro provided by BenchmarkExt is `@benchmark`:
 
 ```julia
-julia> using BenchmarkTools
+julia> using BenchmarkExt
 
 # The `setup` expression is run once per sample, and is not included in the
 # timing results. Note that each sample can require multiple evaluations
-# benchmark kernel evaluations. See the BenchmarkTools manual for details.
+# benchmark kernel evaluations. See the BenchmarkExt manual for details.
 julia> @benchmark sort(data) setup=(data=rand(10))
-BenchmarkTools.Trial:
+BenchmarkExt.Trial:
  10000 samples with 968 evaulations took a median time of 90.902 ns (0.00% GC)
  Time  (mean ± σ):   94.936 ns ±  47.797 ns  (GC: 2.78% ±  5.03%)
  Range (min … max):  77.655 ns … 954.823 ns  (GC: 0.00% … 87.94%)
@@ -29,7 +29,7 @@ BenchmarkTools.Trial:
  Memory estimate: 160 bytes, allocs estimate: 1.
 ```
 
-For quick sanity checks, one can use the [`@btime` macro](https://github.com/JuliaCI/BenchmarkTools.jl/blob/master/doc/manual.md#benchmarking-basics), which is a convenience wrapper around `@benchmark` whose output is analogous to Julia's built-in [`@time` macro](https://docs.julialang.org/en/v1/base/base/#Base.@time):
+For quick sanity checks, one can use the [`@btime` macro](https://github.com/JuliaCI/BenchmarkExt.jl/blob/master/doc/manual.md#benchmarking-basics), which is a convenience wrapper around `@benchmark` whose output is analogous to Julia's built-in [`@time` macro](https://docs.julialang.org/en/v1/base/base/#Base.@time):
 
 ```julia
 julia> @btime sin(x) setup=(x=rand())
@@ -40,7 +40,7 @@ julia> @btime sin(x) setup=(x=rand())
 If you're interested in profiling a fast-running command, you can use `@bprofile sin(x) setup=(x=rand())` and then your favorite
 tools for displaying the results (`Profile.print` or a graphical viewer).
 
-If the expression you want to benchmark depends on external variables, you should use [`$` to "interpolate"](https://github.com/JuliaCI/BenchmarkTools.jl/blob/master/doc/manual.md#interpolating-values-into-benchmark-expressions) them into the benchmark expression to
+If the expression you want to benchmark depends on external variables, you should use [`$` to "interpolate"](https://github.com/JuliaCI/BenchmarkExt.jl/blob/master/doc/manual.md#interpolating-values-into-benchmark-expressions) them into the benchmark expression to
 [avoid the problems of benchmarking with globals](https://docs.julialang.org/en/v1/manual/performance-tips/#Avoid-global-variables).
 Essentially, any interpolated variable `$x` or expression `$(...)` is "pre-computed" before benchmarking begins:
 
@@ -73,4 +73,4 @@ julia> @btime $(Ref(a))[] + $(Ref(b))[]
 3
 ```
 
-As described the [Manual](@ref), the BenchmarkTools package supports many other features, both for additional output and for more fine-grained control over the benchmarking process.
+As described the [Manual](@ref), the BenchmarkExt package supports many other features, both for additional output and for more fine-grained control over the benchmarking process.
diff --git a/docs/src/internals.md b/docs/src/internals.md
index b37b2cca..470d03f9 100644
--- a/docs/src/internals.md
+++ b/docs/src/internals.md
@@ -1,7 +1,7 @@
 # Internals
 
 ```@autodocs
-Modules = [BenchmarkTools]
+Modules = [BenchmarkExt]
 Public = false
 Filter = f -> f !== Base.run
 ```
diff --git a/docs/src/manual.md b/docs/src/manual.md
index 8fd5105a..beea86fa 100644
--- a/docs/src/manual.md
+++ b/docs/src/manual.md
@@ -1,5 +1,5 @@
 # Manual
-BenchmarkTools was created to facilitate the following tasks:
+BenchmarkExt was created to facilitate the following tasks:
 
 1. Organize collections of benchmarks into manageable benchmark suites
 2. Configure, save, and reload benchmark parameters for convenience, accuracy, and consistency
@@ -14,7 +14,7 @@ Before we get too far, let's define some of the terminology used in this documen
 - "benchmark parameters": the configuration settings that determine how a benchmark trial is performed
 
 The reasoning behind our definition of "sample" may not be obvious to all readers. If the time to execute a benchmark is smaller than the resolution of your timing method, then a single evaluation of the benchmark will generally not produce a valid sample. In that case, one must approximate a valid sample by
-recording the total time `t` it takes to record `n` evaluations, and estimating the sample's time per evaluation as `t/n`. For example, if a sample takes 1 second for 1 million evaluations, the approximate time per evaluation for that sample is 1 microsecond. It's not obvious what the right number of evaluations per sample should be for any given benchmark, so BenchmarkTools provides a mechanism (the `tune!` method) to automatically figure it out for you.
+recording the total time `t` it takes to record `n` evaluations, and estimating the sample's time per evaluation as `t/n`. For example, if a sample takes 1 second for 1 million evaluations, the approximate time per evaluation for that sample is 1 microsecond. It's not obvious what the right number of evaluations per sample should be for any given benchmark, so BenchmarkExt provides a mechanism (the `tune!` method) to automatically figure it out for you.
 
 ## Benchmarking basics
 
@@ -24,7 +24,7 @@ To quickly benchmark a Julia expression, use `@benchmark`:
 
 ```julia
 julia> @benchmark sin(1)
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
+BenchmarkExt.Trial: 10000 samples with 1000 evaluations.
  Range (min … max):  1.442 ns … 53.028 ns  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     1.453 ns              ┊ GC (median):    0.00%
  Time  (mean ± σ):   1.462 ns ±  0.566 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
@@ -45,7 +45,7 @@ julia> b = @benchmarkable sin(1); # define the benchmark with default parameters
 julia> tune!(b);
 
 julia> run(b)
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
+BenchmarkExt.Trial: 10000 samples with 1000 evaluations.
  Range (min … max):  1.442 ns … 4.308 ns  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     1.453 ns             ┊ GC (median):    0.00%
  Time  (mean ± σ):   1.456 ns ± 0.056 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
@@ -77,22 +77,22 @@ julia> @belapsed sin(1)
 
 You can pass the following keyword arguments to `@benchmark`, `@benchmarkable`, and `run` to configure the execution process:
 
-- `samples`: The number of samples to take. Execution will end if this many samples have been collected. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.samples = 10000`.
-- `seconds`: The number of seconds budgeted for the benchmarking process. The trial will terminate if this time is exceeded (regardless of `samples`), but at least one sample will always be taken. In practice, actual runtime can overshoot the budget by the duration of a sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.seconds = 5`.
-- `evals`: The number of evaluations per sample. For best results, this should be kept consistent between trials. A good guess for this value can be automatically set on a benchmark via `tune!`, but using `tune!` can be less consistent than setting `evals` manually. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.evals = 1`.
-- `overhead`: The estimated loop overhead per evaluation in nanoseconds, which is automatically subtracted from every sample time measurement. The default value is `BenchmarkTools.DEFAULT_PARAMETERS.overhead = 0`. `BenchmarkTools.estimate_overhead` can be called to determine this value empirically (which can then be set as the default value, if you want).
-- `gctrial`: If `true`, run `gc()` before executing this benchmark's trial. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gctrial = true`.
-- `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.gcsample = false`.
-- `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
-- `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
+- `samples`: The number of samples to take. Execution will end if this many samples have been collected. Defaults to `BenchmarkExt.DEFAULT_PARAMETERS.samples = 10000`.
+- `seconds`: The number of seconds budgeted for the benchmarking process. The trial will terminate if this time is exceeded (regardless of `samples`), but at least one sample will always be taken. In practice, actual runtime can overshoot the budget by the duration of a sample. Defaults to `BenchmarkExt.DEFAULT_PARAMETERS.seconds = 5`.
+- `evals`: The number of evaluations per sample. For best results, this should be kept consistent between trials. A good guess for this value can be automatically set on a benchmark via `tune!`, but using `tune!` can be less consistent than setting `evals` manually. Defaults to `BenchmarkExt.DEFAULT_PARAMETERS.evals = 1`.
+- `overhead`: The estimated loop overhead per evaluation in nanoseconds, which is automatically subtracted from every sample time measurement. The default value is `BenchmarkExt.DEFAULT_PARAMETERS.overhead = 0`. `BenchmarkExt.estimate_overhead` can be called to determine this value empirically (which can then be set as the default value, if you want).
+- `gctrial`: If `true`, run `gc()` before executing this benchmark's trial. Defaults to `BenchmarkExt.DEFAULT_PARAMETERS.gctrial = true`.
+- `gcsample`: If `true`, run `gc()` before each sample. Defaults to `BenchmarkExt.DEFAULT_PARAMETERS.gcsample = false`.
+- `time_tolerance`: The noise tolerance for the benchmark's time estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkExt.DEFAULT_PARAMETERS.time_tolerance = 0.05`.
+- `memory_tolerance`: The noise tolerance for the benchmark's memory estimate, as a percentage. This is utilized after benchmark execution, when analyzing results. Defaults to `BenchmarkExt.DEFAULT_PARAMETERS.memory_tolerance = 0.01`.
 
-To change the default values of the above fields, one can mutate the fields of `BenchmarkTools.DEFAULT_PARAMETERS`, for example:
+To change the default values of the above fields, one can mutate the fields of `BenchmarkExt.DEFAULT_PARAMETERS`, for example:
 
 ```julia
 # change default for `seconds` to 2.5
-BenchmarkTools.DEFAULT_PARAMETERS.seconds = 2.50
+BenchmarkExt.DEFAULT_PARAMETERS.seconds = 2.50
 # change default for `time_tolerance` to 0.20
-BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = 0.20
+BenchmarkExt.DEFAULT_PARAMETERS.time_tolerance = 0.20
 ```
 
 Here's an example that demonstrates how to pass these parameters to benchmark definitions:
@@ -109,7 +109,7 @@ You can interpolate values into `@benchmark` and `@benchmarkable` expressions:
 ```julia
 # rand(1000) is executed for each evaluation
 julia> @benchmark sum(rand(1000))
-BenchmarkTools.Trial: 10000 samples with 10 evaluations.
+BenchmarkExt.Trial: 10000 samples with 10 evaluations.
  Range (min … max):  1.153 μs … 142.253 μs  ┊ GC (min … max): 0.00% … 96.43%
  Time  (median):     1.363 μs               ┊ GC (median):    0.00%
  Time  (mean ± σ):   1.786 μs ±   4.612 μs  ┊ GC (mean ± σ):  9.58% ±  3.70%
@@ -123,7 +123,7 @@ BenchmarkTools.Trial: 10000 samples with 10 evaluations.
 # rand(1000) is evaluated at definition time, and the resulting
 # value is interpolated into the benchmark expression
 julia> @benchmark sum($(rand(1000)))
-BenchmarkTools.Trial: 10000 samples with 963 evaluations.
+BenchmarkExt.Trial: 10000 samples with 963 evaluations.
  Range (min … max):  84.477 ns … 241.602 ns  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     84.497 ns               ┊ GC (median):    0.00%
  Time  (mean ± σ):   85.125 ns ±   5.262 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
@@ -142,7 +142,7 @@ julia> A = rand(1000);
 
 # BAD: A is a global variable in the benchmarking context
 julia> @benchmark [i*i for i in A]
-BenchmarkTools.Trial: 10000 samples with 54 evaluations.
+BenchmarkExt.Trial: 10000 samples with 54 evaluations.
  Range (min … max):  889.241 ns … 29.584 μs  ┊ GC (min … max):  0.00% … 93.33%
  Time  (median):       1.073 μs              ┊ GC (median):     0.00%
  Time  (mean ± σ):     1.296 μs ±  2.004 μs  ┊ GC (mean ± σ):  14.31% ±  8.76%
@@ -155,7 +155,7 @@ BenchmarkTools.Trial: 10000 samples with 54 evaluations.
 
 # GOOD: A is a constant value in the benchmarking context
 julia> @benchmark [i*i for i in $A]
-BenchmarkTools.Trial: 10000 samples with 121 evaluations.
+BenchmarkExt.Trial: 10000 samples with 121 evaluations.
  Range (min … max):  742.455 ns … 11.846 μs  ┊ GC (min … max):  0.00% … 88.05%
  Time  (median):     909.959 ns              ┊ GC (median):     0.00%
  Time  (mean ± σ):     1.135 μs ±  1.366 μs  ┊ GC (mean ± σ):  16.94% ± 12.58%
@@ -210,7 +210,7 @@ julia> let x = 1
 
 ### Setup and teardown phases
 
-BenchmarkTools allows you to pass `setup` and `teardown` expressions to `@benchmark` and `@benchmarkable`. The `setup` expression is evaluated just before sample execution, while the `teardown` expression is evaluated just after sample execution. Here's an example where this kind of thing is useful:
+BenchmarkExt allows you to pass `setup` and `teardown` expressions to `@benchmark` and `@benchmarkable`. The `setup` expression is evaluated just before sample execution, while the `teardown` expression is evaluated just after sample execution. Here's an example where this kind of thing is useful:
 
 ```julia
 julia> x = rand(100000);
@@ -221,7 +221,7 @@ julia> b = @benchmarkable sort!(y) setup=(y = copy($x))
 Benchmark(evals=1, seconds=5.0, samples=10000)
 
 julia> run(b)
-BenchmarkTools.Trial: 819 samples with 1 evaluations.
+BenchmarkExt.Trial: 819 samples with 1 evaluations.
  Range (min … max):  5.983 ms …  6.954 ms  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     6.019 ms              ┊ GC (median):    0.00%
  Time  (mean ± σ):   6.029 ms ± 46.222 μs  ┊ GC (mean ± σ):  0.00% ± 0.00%
@@ -242,7 +242,7 @@ Note that the `setup` and `teardown` phases are **executed for each sample, not
 It's possible for LLVM and Julia's compiler to perform optimizations on `@benchmarkable` expressions. In some cases, these optimizations can elide a computation altogether, resulting in unexpectedly "fast" benchmarks. For example, the following expression is non-allocating:
 ```julia
 julia> @benchmark (view(a, 1:2, 1:2); 1) setup=(a = rand(3, 3))
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
+BenchmarkExt.Trial: 10000 samples with 1000 evaluations.
  Range (min … max):  2.885 ns … 14.797 ns  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     2.895 ns              ┊ GC (median):    0.00%
  Time  (mean ± σ):   3.320 ns ±  0.909 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
@@ -258,7 +258,7 @@ Note, however, that this does not mean that `view(a, 1:2, 1:2)` is non-allocatin
 
 ```julia
 julia> @benchmark view(a, 1:2, 1:2) setup=(a = rand(3, 3))
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
+BenchmarkExt.Trial: 10000 samples with 1000 evaluations.
  Range (min … max):  3.175 ns … 18.314 ns  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     3.176 ns              ┊ GC (median):    0.00%
  Time  (mean ± σ):   3.262 ns ±  0.882 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
@@ -272,7 +272,7 @@ BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
 
 The key point here is that these two benchmarks measure different things, even though their code is similar. In the first example, Julia was able to optimize away `view(a, 1:2, 1:2)` because it could prove that the value wasn't being returned and `a` wasn't being mutated. In the second example, the optimization is not performed because `view(a, 1:2, 1:2)` is a return value of the benchmark expression.
 
-BenchmarkTools will faithfully report the performance of the exact code that you provide to it, including any compiler optimizations that might happen to elide the code completely. It's up to you to design benchmarks which actually exercise the code you intend to exercise. 
+BenchmarkExt will faithfully report the performance of the exact code that you provide to it, including any compiler optimizations that might happen to elide the code completely. It's up to you to design benchmarks which actually exercise the code you intend to exercise. 
 
 A common place julia's optimizer may cause a benchmark to not measure what a user thought it was measuring is simple operations where all values are known at compile time. Suppose you wanted to measure the time it takes to add together two integers:
 ```julia
@@ -292,7 +292,7 @@ julia> @btime $(Ref(a))[] + $(Ref(b))[]
 
 ## Handling benchmark results
 
-BenchmarkTools provides four types related to benchmark results:
+BenchmarkExt provides four types related to benchmark results:
 
 - `Trial`: stores all samples collected during a benchmark trial, as well as the trial's parameters
 - `TrialEstimate`: a single estimate used to summarize a `Trial`
@@ -307,7 +307,7 @@ Running a benchmark produces an instance of the `Trial` type:
 
 ```julia
 julia> t = @benchmark eigen(rand(10, 10))
-BenchmarkTools.Trial: 10000 samples with 1 evaluations.
+BenchmarkExt.Trial: 10000 samples with 1 evaluations.
  Range (min … max):  26.549 μs …  1.503 ms  ┊ GC (min … max): 0.00% … 93.21%
  Time  (median):     30.818 μs              ┊ GC (median):    0.00%
  Time  (mean ± σ):   31.777 μs ± 25.161 μs  ┊ GC (mean ± σ):  1.31% ±  1.63%
@@ -319,8 +319,8 @@ BenchmarkTools.Trial: 10000 samples with 1 evaluations.
  Memory estimate: 16.36 KiB, allocs estimate: 19.
 
 julia> dump(t) # here's what's actually stored in a Trial
-BenchmarkTools.Trial
-  params: BenchmarkTools.Parameters
+BenchmarkExt.Trial
+  params: BenchmarkExt.Parameters
     seconds: Float64 5.0
     samples: Int64 10000
     evals: Int64 1
@@ -339,35 +339,35 @@ As you can see from the above, a couple of different timing estimates are pretty
 
 ```julia
 julia> minimum(t)
-BenchmarkTools.TrialEstimate: 
+BenchmarkExt.TrialEstimate: 
   time:             26.549 μs
   gctime:           0.000 ns (0.00%)
   memory:           16.36 KiB
   allocs:           19
 
 julia> median(t)
-BenchmarkTools.TrialEstimate: 
+BenchmarkExt.TrialEstimate: 
   time:             30.818 μs
   gctime:           0.000 ns (0.00%)
   memory:           16.36 KiB
   allocs:           19
 
 julia> mean(t)
-BenchmarkTools.TrialEstimate: 
+BenchmarkExt.TrialEstimate: 
   time:             31.777 μs
   gctime:           415.686 ns (1.31%)
   memory:           16.36 KiB
   allocs:           19
 
 julia> maximum(t)
-BenchmarkTools.TrialEstimate: 
+BenchmarkExt.TrialEstimate: 
   time:             1.503 ms
   gctime:           1.401 ms (93.21%)
   memory:           16.36 KiB
   allocs:           19
 
 julia> std(t)
-BenchmarkTools.TrialEstimate: 
+BenchmarkExt.TrialEstimate: 
   time:             25.161 μs
   gctime:           23.999 μs (95.38%)
   memory:           16.36 KiB
@@ -385,7 +385,7 @@ Time distributions are always right-skewed for the benchmarks we've tested. This
 
 ### `TrialRatio` and `TrialJudgement`
 
-BenchmarkTools supplies a `ratio` function for comparing two values:
+BenchmarkExt supplies a `ratio` function for comparing two values:
 
 ```julia
 julia> ratio(3, 2)
@@ -405,28 +405,28 @@ julia> ratio(0, 0)
 Calling the `ratio` function on two `TrialEstimate` instances compares their fields:
 
 ```julia
-julia> using BenchmarkTools
+julia> using BenchmarkExt
 
 julia> b = @benchmarkable eigen(rand(10, 10));
 
 julia> tune!(b);
 
 julia> m1 = median(run(b))
-BenchmarkTools.TrialEstimate:
+BenchmarkExt.TrialEstimate:
   time:             38.638 μs
   gctime:           0.000 ns (0.00%)
   memory:           9.30 KiB
   allocs:           28
 
 julia> m2 = median(run(b))
-BenchmarkTools.TrialEstimate:
+BenchmarkExt.TrialEstimate:
   time:             38.723 μs
   gctime:           0.000 ns (0.00%)
   memory:           9.30 KiB
   allocs:           28
 
 julia> ratio(m1, m2)
-BenchmarkTools.TrialRatio:
+BenchmarkExt.TrialRatio:
   time:             0.997792009916587
   gctime:           1.0
   memory:           1.0
@@ -437,14 +437,14 @@ Use the `judge` function to decide if the estimate passed as first argument repr
 
 ```julia
 julia> m1 = median(@benchmark eigen(rand(10, 10)))
-BenchmarkTools.TrialEstimate:
+BenchmarkExt.TrialEstimate:
   time:             38.745 μs
   gctime:           0.000 ns (0.00%)
   memory:           9.30 KiB
   allocs:           28
 
 julia> m2 = median(@benchmark eigen(rand(10, 10)))
-BenchmarkTools.TrialEstimate:
+BenchmarkExt.TrialEstimate:
   time:             38.611 μs
   gctime:           0.000 ns (0.00%)
   memory:           9.30 KiB
@@ -452,19 +452,19 @@ BenchmarkTools.TrialEstimate:
 
 # percent change falls within noise tolerance for all fields
 julia> judge(m1, m2)
-BenchmarkTools.TrialJudgement:
+BenchmarkExt.TrialJudgement:
   time:   +0.35% => invariant (5.00% tolerance)
   memory: +0.00% => invariant (1.00% tolerance)
 
 # changing time_tolerance causes it to be marked as a regression
 julia> judge(m1, m2; time_tolerance = 0.0001)
-BenchmarkTools.TrialJudgement:
+BenchmarkExt.TrialJudgement:
   time:   +0.35% => regression (0.01% tolerance)
   memory: +0.00% => invariant (1.00% tolerance)
 
 # switch m1 & m2; from this perspective, the difference is an improvement
 julia> judge(m2, m1; time_tolerance = 0.0001)
-BenchmarkTools.TrialJudgement:
+BenchmarkExt.TrialJudgement:
   time:   -0.35% => improvement (0.01% tolerance)
   memory: +0.00% => invariant (1.00% tolerance)
 
@@ -510,13 +510,13 @@ Let's look at our newly defined suite in the REPL:
 
 ```julia
 julia> suite
-2-element BenchmarkTools.BenchmarkGroup:
+2-element BenchmarkExt.BenchmarkGroup:
   tags: []
-  "utf8" => 2-element BenchmarkTools.BenchmarkGroup:
+  "utf8" => 2-element BenchmarkExt.BenchmarkGroup:
 	  tags: ["string", "unicode"]
 	  "join" => Benchmark(evals=1, seconds=5.0, samples=10000)
 	  "replace" => Benchmark(evals=1, seconds=5.0, samples=10000)
-  "trig" => 6-element BenchmarkTools.BenchmarkGroup:
+  "trig" => 6-element BenchmarkExt.BenchmarkGroup:
 	  tags: ["math", "triangles"]
 	  ("cos", 0.0) => Benchmark(evals=1, seconds=5.0, samples=10000)
 	  ("sin", π = 3.1415926535897...) => Benchmark(evals=1, seconds=5.0, samples=10000)
@@ -559,7 +559,7 @@ done (took 1.697970114 seconds)
   (6/6) benchmarking ("tan",0.0)...
   done (took 0.320894744 seconds)
 done (took 2.022673065 seconds)
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
   "utf8" => BenchmarkGroup(["string", "unicode"])
   "trig" => BenchmarkGroup(["math", "triangles"])
@@ -572,13 +572,13 @@ Following from the previous section, we see that running our benchmark suite ret
 
 ```julia
 julia> results["utf8"]
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: ["string", "unicode"]
   "join" => Trial(133.84 ms) # summary(::Trial) displays the minimum time estimate
   "replace" => Trial(202.3 μs)
 
 julia> results["trig"]
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: ["math", "triangles"]
   ("tan",π = 3.1415926535897...) => Trial(28.0 ns)
   ("cos",0.0) => Trial(6.0 ns)
@@ -592,19 +592,19 @@ Most of the functions on result-related types (`Trial`, `TrialEstimate`, `TrialR
 
 ```julia
 julia> m1 = median(results["utf8"]) # == median(results["utf8"])
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: ["string", "unicode"]
   "join" => TrialEstimate(143.68 ms)
   "replace" => TrialEstimate(203.24 μs)
 
 julia> m2 = median(run(suite["utf8"]))
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: ["string", "unicode"]
   "join" => TrialEstimate(144.79 ms)
   "replace" => TrialEstimate(202.49 μs)
 
 julia> judge(m1, m2; time_tolerance = 0.001) # use 0.1 % time tolerance
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: ["string", "unicode"]
   "join" => TrialJudgement(-0.76% => improvement)
   "replace" => TrialJudgement(+0.37% => regression)
@@ -624,18 +624,18 @@ julia> g = BenchmarkGroup([], # no tags in the parent
                                                 "d" => BenchmarkGroup(["8"], 1 => 1),
                                                 "e" => BenchmarkGroup(["9"], 2 => 2)));
 julia> g
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  "c" => BenchmarkTools.BenchmarkGroup:
+  "c" => BenchmarkExt.BenchmarkGroup:
 	  tags: ["5", "6", "7"]
-  "b" => BenchmarkTools.BenchmarkGroup:
+  "b" => BenchmarkExt.BenchmarkGroup:
 	  tags: ["3", "4", "5"]
-  "a" => BenchmarkTools.BenchmarkGroup:
+  "a" => BenchmarkExt.BenchmarkGroup:
 	  tags: ["1", "2", "3"]
-	  "e" => BenchmarkTools.BenchmarkGroup:
+	  "e" => BenchmarkExt.BenchmarkGroup:
 		  tags: ["9"]
 		  2 => 2
-	  "d" => BenchmarkTools.BenchmarkGroup:
+	  "d" => BenchmarkExt.BenchmarkGroup:
 		  tags: ["8"]
 		  1 => 1
 ```
@@ -644,7 +644,7 @@ We can filter this group by tag using the `@tagged` macro. This macro takes in a
 
 ```julia
 julia> g[@tagged ("3" || "7") && !("1")]
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
   "c" => BenchmarkGroup(["5", "6", "7"])
   "b" => BenchmarkGroup(["3", "4", "5"])
@@ -662,32 +662,32 @@ To demonstrate the last two points:
 ```julia
 # also could've used `@tagged "1"`, `@tagged "a"`, `@tagged "e" || "d"`
 julia> g[@tagged "8" || "9"]
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  "a" => BenchmarkTools.BenchmarkGroup:
+  "a" => BenchmarkExt.BenchmarkGroup:
 	  tags: ["1", "2", "3"]
-	  "e" => BenchmarkTools.BenchmarkGroup:
+	  "e" => BenchmarkExt.BenchmarkGroup:
 		  tags: ["9"]
 		  2 => 2
-	  "d" => BenchmarkTools.BenchmarkGroup:
+	  "d" => BenchmarkExt.BenchmarkGroup:
 		  tags: ["8"]
 		  1 => 1
 
 julia> g[@tagged "d"]
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
     tags: []
-    "a" => BenchmarkTools.BenchmarkGroup:
+    "a" => BenchmarkExt.BenchmarkGroup:
 	  tags: ["1", "2", "3"]
-	  "d" => BenchmarkTools.BenchmarkGroup:
+	  "d" => BenchmarkExt.BenchmarkGroup:
 		  tags: ["8"]
 		  1 => 1
 
 julia> g[@tagged "9"]
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  "a" => BenchmarkTools.BenchmarkGroup:
+  "a" => BenchmarkExt.BenchmarkGroup:
 	  tags: ["1", "2", "3"]
-	  "e" => BenchmarkTools.BenchmarkGroup:
+	  "e" => BenchmarkExt.BenchmarkGroup:
 		  tags: ["9"]
 		  2 => 2
 ```
@@ -698,56 +698,56 @@ It's sometimes useful to create `BenchmarkGroup` where the keys are drawn from o
 
 ```julia
 julia> g # leaf values are integers
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  "c" => BenchmarkTools.BenchmarkGroup:
+  "c" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "1" => 1
 	  "2" => 2
 	  "3" => 3
-  "b" => BenchmarkTools.BenchmarkGroup:
+  "b" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "1" => 1
 	  "2" => 2
 	  "3" => 3
-  "a" => BenchmarkTools.BenchmarkGroup:
+  "a" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "1" => 1
 	  "2" => 2
 	  "3" => 3
-  "d" => BenchmarkTools.BenchmarkGroup:
+  "d" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "1" => 1
 	  "2" => 2
 	  "3" => 3
 
 julia> x # note that leaf values are characters
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  "c" => BenchmarkTools.BenchmarkGroup:
+  "c" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "2" => '2'
-  "a" => BenchmarkTools.BenchmarkGroup:
+  "a" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "1" => '1'
 	  "3" => '3'
-  "d" => BenchmarkTools.BenchmarkGroup:
+  "d" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "1" => '1'
 	  "2" => '2'
 	  "3" => '3'
 
 julia> g[x] # index into `g` with the keys of `x`
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  "c" => BenchmarkTools.BenchmarkGroup:
+  "c" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "2" => 2
-  "a" => BenchmarkTools.BenchmarkGroup:
+  "a" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "1" => 1
 	  "3" => 3
-  "d" => BenchmarkTools.BenchmarkGroup:
+  "d" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  "1" => 1
 	  "2" => 2
@@ -766,28 +766,28 @@ You may have noticed that nested `BenchmarkGroup` instances form a tree-like str
 
 Since these trees can be arbitrarily asymmetric, it can be cumbersome to write certain `BenchmarkGroup` transformations using only the indexing facilities previously discussed.
 
-To solve this problem, BenchmarkTools allows you to uniquely index group nodes using a `Vector` of the node's parents' keys. For example:
+To solve this problem, BenchmarkExt allows you to uniquely index group nodes using a `Vector` of the node's parents' keys. For example:
 
 ```julia
 julia> g = BenchmarkGroup([], 1 => BenchmarkGroup([], "a" => BenchmarkGroup([], :b => 1234)));
 
 julia> g
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  1 => BenchmarkTools.BenchmarkGroup:
+  1 => BenchmarkExt.BenchmarkGroup:
 	  tags: []
-	  "a" => BenchmarkTools.BenchmarkGroup:
+	  "a" => BenchmarkExt.BenchmarkGroup:
 		  tags: []
 		  :b => 1234
 
 julia> g[[1]] # == g[1]
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  "a" => BenchmarkTools.BenchmarkGroup:
+  "a" => BenchmarkExt.BenchmarkGroup:
 	  tags: []
 	  :b => 1234
 julia> g[[1, "a"]] # == g[1]["a"]
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
   :b => 1234
 julia> g[[1, "a", :b]] # == g[1]["a"][:b]
@@ -801,11 +801,11 @@ julia> g[[1, "a", :b]] = "hello"
 "hello"
 
 julia> g
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
-  1 => BenchmarkTools.BenchmarkGroup:
+  1 => BenchmarkExt.BenchmarkGroup:
 	  tags: []
-	  "a" => BenchmarkTools.BenchmarkGroup:
+	  "a" => BenchmarkExt.BenchmarkGroup:
 		  tags: []
 		  :b => "hello"
 ```
@@ -817,16 +817,16 @@ julia>  g[[2, "a", :b]] = "hello again"
 "hello again"
 
 julia> g
-2-element BenchmarkTools.BenchmarkGroup:
+2-element BenchmarkExt.BenchmarkGroup:
   tags: []
-  2 => 1-element BenchmarkTools.BenchmarkGroup:
+  2 => 1-element BenchmarkExt.BenchmarkGroup:
           tags: []
-          "a" => 1-element BenchmarkTools.BenchmarkGroup:
+          "a" => 1-element BenchmarkExt.BenchmarkGroup:
                   tags: []
                   :b => "hello again"
-  1 => 1-element BenchmarkTools.BenchmarkGroup:
+  1 => 1-element BenchmarkExt.BenchmarkGroup:
           tags: []
-          "a" => 1-element BenchmarkTools.BenchmarkGroup:
+          "a" => 1-element BenchmarkExt.BenchmarkGroup:
                   tags: []
                   :b => "hello"
 ```
@@ -853,7 +853,7 @@ Note that terminal child group nodes are not considered "leaves" by the `leaves`
 
 ## Caching `Parameters`
 
-A common workflow used in BenchmarkTools is the following:
+A common workflow used in BenchmarkExt is the following:
 
 1. Start a Julia session
 2. Execute a benchmark suite using an old version of your package
@@ -867,12 +867,12 @@ There are a couple of problems with this workflow, and all of which revolve arou
 - Consistency: Given enough time, successive calls to `tune!` will usually yield reasonably consistent values for the "evaluations per sample" parameter, even in spite of noise. However, some benchmarks are highly sensitive to slight changes in this parameter. Thus, it would be best to have some guarantee that all experiments are configured equally (i.e., a guarantee that step 2 will use the exact same parameters as step 5).
 - Turnaround time: For most benchmarks, `tune!` needs to perform many evaluations to determine the proper parameters for any given benchmark - often more evaluations than are performed when running a trial. In fact, the majority of total benchmarking time is usually spent tuning parameters, rather than actually running trials.
 
-BenchmarkTools solves these problems by allowing you to pre-tune your benchmark suite, save the "evaluations per sample" parameters, and load them on demand:
+BenchmarkExt solves these problems by allowing you to pre-tune your benchmark suite, save the "evaluations per sample" parameters, and load them on demand:
 
 ```julia
 # untuned example suite
 julia> suite
-BenchmarkTools.BenchmarkGroup:
+BenchmarkExt.BenchmarkGroup:
   tags: []
   "utf8" => BenchmarkGroup(["string", "unicode"])
   "trig" => BenchmarkGroup(["math", "triangles"])
@@ -882,15 +882,15 @@ julia> tune!(suite);
 
 # save the suite's parameters using a thin wrapper
 # over JSON (this wrapper maintains compatibility
-# across BenchmarkTools versions)
-julia> BenchmarkTools.save("params.json", params(suite));
+# across BenchmarkExt versions)
+julia> BenchmarkExt.save("params.json", params(suite));
 ```
 
 Now, instead of tuning `suite` every time we load the benchmarks in a new Julia session, we can simply load the parameters in the JSON file using the `loadparams!` function. The `[1]` on the `load` call gets the first value that was serialized into the JSON file, which in this case is the parameters.
 
 ```julia
 # syntax is loadparams!(group, paramsgroup, fields...)
-julia> loadparams!(suite, BenchmarkTools.load("params.json")[1], :evals, :samples);
+julia> loadparams!(suite, BenchmarkExt.load("params.json")[1], :evals, :samples);
 ```
 
 Caching parameters in this manner leads to a far shorter turnaround time, and more importantly, much more consistent results.
@@ -905,7 +905,7 @@ julia> io = IOContext(stdout, :histmin=>0.5, :histmax=>8, :logbins=>true)
 IOContext(Base.TTY(RawFD(13) open, 0 bytes waiting))
 
 julia> b = @benchmark x^3   setup=(x = rand()); show(io, MIME("text/plain"), b)
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
+BenchmarkExt.Trial: 10000 samples with 1000 evaluations.
  Range (min … max):  1.239 ns … 31.433 ns  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     1.244 ns              ┊ GC (median):    0.00%
  Time  (mean ± σ):   1.266 ns ±  0.611 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
@@ -916,7 +916,7 @@ BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
 
  Memory estimate: 0 bytes, allocs estimate: 0.
 julia> b = @benchmark x^3.0 setup=(x = rand()); show(io, MIME("text/plain"), b)
-BenchmarkTools.Trial: 10000 samples with 1000 evaluations.
+BenchmarkExt.Trial: 10000 samples with 1000 evaluations.
  Range (min … max):  5.636 ns … 38.756 ns  ┊ GC (min … max): 0.00% … 0.00%
  Time  (median):     5.662 ns              ┊ GC (median):    0.00%
  Time  (mean ± σ):   5.767 ns ±  1.384 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
@@ -959,8 +959,8 @@ This will display each `Trial` as a violin plot.
 
 ## Miscellaneous tips and info
 
-- BenchmarkTools restricts the minimum measurable benchmark execution time to one picosecond.
+- BenchmarkExt restricts the minimum measurable benchmark execution time to one picosecond.
 - If you use `rand` or something similar to generate the values that are used in your benchmarks, you should seed the RNG (or provide a seeded RNG) so that the values are consistent between trials/samples/evaluations.
-- BenchmarkTools attempts to be robust against machine noise occurring between *samples*, but BenchmarkTools can't do very much about machine noise occurring between *trials*. To cut down on the latter kind of noise, it is advised that you dedicate CPUs and memory to the benchmarking Julia process by using a shielding tool such as [cset](http://manpages.ubuntu.com/manpages/precise/man1/cset.1.html).
+- BenchmarkExt attempts to be robust against machine noise occurring between *samples*, but BenchmarkExt can't do very much about machine noise occurring between *trials*. To cut down on the latter kind of noise, it is advised that you dedicate CPUs and memory to the benchmarking Julia process by using a shielding tool such as [cset](http://manpages.ubuntu.com/manpages/precise/man1/cset.1.html).
 - On some machines, for some versions of BLAS and Julia, the number of BLAS worker threads can exceed the number of available cores. This can occasionally result in scheduling issues and inconsistent performance for BLAS-heavy benchmarks. To fix this issue, you can use `BLAS.set_num_threads(i::Int)` in the Julia REPL to ensure that the number of BLAS threads is equal to or less than the number of available cores.
 - `@benchmark` is evaluated in global scope, even if called from local scope.
diff --git a/src/BenchmarkTools.jl b/src/BenchmarkExt.jl
similarity index 92%
rename from src/BenchmarkTools.jl
rename to src/BenchmarkExt.jl
index 600cb7f4..c6880a3b 100644
--- a/src/BenchmarkTools.jl
+++ b/src/BenchmarkExt.jl
@@ -1,4 +1,4 @@
-module BenchmarkTools
+module BenchmarkExt
 
 using JSON
 using Base.Iterators
@@ -10,7 +10,7 @@ using Printf
 using Profile
 
 
-const BENCHMARKTOOLS_VERSION = v"1.0.0"
+const BENCHMARKEXT_VERSION = v"0.1.0"
 
 ##############
 # Parameters #
@@ -78,4 +78,4 @@ export tune!,
 
 include("serialization.jl")
 
-end # module BenchmarkTools
+end # module BenchmarkExt
diff --git a/src/execution.jl b/src/execution.jl
index e8b1d509..0758b7ba 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -339,7 +339,7 @@ to benchmark.
 
 ```julia-repl
 julia> @benchmark sin(1)
-BenchmarkTools.Trial:
+BenchmarkExt.Trial:
   memory estimate:  0 bytes
   allocs estimate:  0
   --------------
@@ -357,7 +357,7 @@ You can interpolate values into `@benchmark` expressions:
 ```julia
 # rand(1000) is executed for each evaluation
 julia> @benchmark sum(rand(1000))
-BenchmarkTools.Trial:
+BenchmarkExt.Trial:
   memory estimate:  7.94 KiB
   allocs estimate:  1
   --------------
@@ -372,7 +372,7 @@ BenchmarkTools.Trial:
 # rand(1000) is evaluated at definition time, and the resulting
 # value is interpolated into the benchmark expression
 julia> @benchmark sum($(rand(1000)))
-BenchmarkTools.Trial:
+BenchmarkExt.Trial:
   memory estimate:  0 bytes
   allocs estimate:  0
   --------------
@@ -389,10 +389,10 @@ macro benchmark(args...)
     _, params = prunekwargs(args...)
     tmp = gensym()
     return esc(quote
-        local $tmp = $BenchmarkTools.@benchmarkable $(args...)
-        $BenchmarkTools.warmup($tmp)
-        $(hasevals(params) ? :() : :($BenchmarkTools.tune!($tmp)))
-        $BenchmarkTools.run($tmp)
+        local $tmp = $BenchmarkExt.@benchmarkable $(args...)
+        $BenchmarkExt.warmup($tmp)
+        $(hasevals(params) ? :() : :($BenchmarkExt.tune!($tmp)))
+        $BenchmarkExt.run($tmp)
     end)
 end
 
@@ -487,7 +487,7 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, quote_
     end
     return Core.eval(eval_module, quote
         @noinline $(signature_def) = begin $(core_body) end
-        @noinline function $(samplefunc)($(Expr(:tuple, quote_vars...)), __params::$BenchmarkTools.Parameters)
+        @noinline function $(samplefunc)($(Expr(:tuple, quote_vars...)), __params::$BenchmarkExt.Parameters)
             $(setup)
             __evals = __params.evals
             __gc_start = Base.gc_num()
@@ -507,7 +507,7 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, quote_
                                __evals))
             return __time, __gctime, __memory, __allocs, __return_val
         end
-        $BenchmarkTools.Benchmark($(samplefunc), $(quote_vals), $(params))
+        $BenchmarkExt.Benchmark($(samplefunc), $(quote_vals), $(params))
     end)
 end
 
@@ -531,7 +531,7 @@ is the *minimum* elapsed time measured during the benchmark.
 """
 macro belapsed(args...)
     return esc(quote
-        $BenchmarkTools.time($BenchmarkTools.minimum($BenchmarkTools.@benchmark $(args...)))/1e9
+        $BenchmarkExt.time($BenchmarkExt.minimum($BenchmarkExt.@benchmark $(args...)))/1e9
     end)
 end
 
@@ -548,7 +548,7 @@ during the benchmark.
 """
 macro ballocated(args...)
     return esc(quote
-        $BenchmarkTools.memory($BenchmarkTools.minimum($BenchmarkTools.@benchmark $(args...)))
+        $BenchmarkExt.memory($BenchmarkExt.minimum($BenchmarkExt.@benchmark $(args...)))
     end)
 end
 
@@ -569,19 +569,19 @@ macro btime(args...)
     _, params = prunekwargs(args...)
     bench, trial, result = gensym(), gensym(), gensym()
     trialmin, trialallocs = gensym(), gensym()
-    tune_phase = hasevals(params) ? :() : :($BenchmarkTools.tune!($bench))
+    tune_phase = hasevals(params) ? :() : :($BenchmarkExt.tune!($bench))
     return esc(quote
-        local $bench = $BenchmarkTools.@benchmarkable $(args...)
-        $BenchmarkTools.warmup($bench)
+        local $bench = $BenchmarkExt.@benchmarkable $(args...)
+        $BenchmarkExt.warmup($bench)
         $tune_phase
-        local $trial, $result = $BenchmarkTools.run_result($bench)
-        local $trialmin = $BenchmarkTools.minimum($trial)
-        local $trialallocs = $BenchmarkTools.allocs($trialmin)
+        local $trial, $result = $BenchmarkExt.run_result($bench)
+        local $trialmin = $BenchmarkExt.minimum($trial)
+        local $trialallocs = $BenchmarkExt.iallocs($trialmin)
         println("  ",
-                $BenchmarkTools.prettytime($BenchmarkTools.time($trialmin)),
+                $BenchmarkExt.prettytime($BenchmarkExt.time($trialmin)),
                 " (", $trialallocs , " allocation",
                 $trialallocs == 1 ? "" : "s", ": ",
-                $BenchmarkTools.prettymemory($BenchmarkTools.memory($trialmin)), ")")
+                $BenchmarkExt.prettymemory($BenchmarkExt.memory($trialmin)), ")")
         $result
     end)
 end
@@ -611,10 +611,10 @@ macro bprofile(args...)
     end
     tmp = gensym()
     return esc(quote
-        local $tmp = $BenchmarkTools.@benchmarkable $(args...)
-        $BenchmarkTools.warmup($tmp)
-        $(hasevals(params) ? :() : :($BenchmarkTools.tune!($tmp)))
-        $BenchmarkTools.Profile.clear()
-        $BenchmarkTools.@profile $BenchmarkTools.run($tmp)
+        local $tmp = $BenchmarkExt.@benchmarkable $(args...)
+        $BenchmarkExt.warmup($tmp)
+        $(hasevals(params) ? :() : :($BenchmarkExt.tune!($tmp)))
+        $BenchmarkExt.Profile.clear()
+        $BenchmarkExt.@profile $BenchmarkExt.run($tmp)
     end)
 end
diff --git a/src/groups.jl b/src/groups.jl
index 48e9671c..13309553 100644
--- a/src/groups.jl
+++ b/src/groups.jl
@@ -167,7 +167,7 @@ struct TagFilter
 end
 
 macro tagged(expr)
-    return :(BenchmarkTools.TagFilter(tags -> $(tagpredicate!(expr))))
+    return :(BenchmarkExt.TagFilter(tags -> $(tagpredicate!(expr))))
 end
 
 tagpredicate!(@nospecialize tag) = :(in(makekey($(esc(tag))), tags))
@@ -289,7 +289,7 @@ function Base.show(io::IO, group::BenchmarkGroup)
         nbound = get(io, :boundto, 10)
     end
 
-    println(io, "$(length(group))-element BenchmarkTools.BenchmarkGroup:")
+    println(io, "$(length(group))-element BenchmarkExt.BenchmarkGroup:")
     pad = get(io, :pad, "")
     print(io, pad, "  tags: ", tagrepr(group.tags))
     count = 1
@@ -332,7 +332,7 @@ macro case(title, xs...)
 end
 
 function benchmarkset_m(title, ex::Expr)
-    stack = GlobalRef(BenchmarkTools, :benchmark_stack)
+    stack = GlobalRef(BenchmarkExt, :benchmark_stack)
     init = quote
         if isempty($stack)
             push!($stack, $BenchmarkGroup())
@@ -359,7 +359,7 @@ function benchmarkset_m(title, ex::Expr)
 end
 
 function benchmarkset_block(title, ex::Expr)
-    stack = GlobalRef(BenchmarkTools, :benchmark_stack)
+    stack = GlobalRef(BenchmarkExt, :benchmark_stack)
     quote
         let $(Symbol("#root#")) = last($stack)
             $(Symbol("#root#"))[$title] = $(Symbol("#suite#")) = BenchmarkGroup()
diff --git a/src/parameters.jl b/src/parameters.jl
index c09c0f90..fc7a58fe 100644
--- a/src/parameters.jl
+++ b/src/parameters.jl
@@ -43,7 +43,7 @@ function Parameters(default::Parameters; seconds = nothing, samples = nothing,
     params.gcsample = gcsample != nothing ? gcsample : default.gcsample
     params.time_tolerance = time_tolerance != nothing ? time_tolerance : default.time_tolerance
     params.memory_tolerance = memory_tolerance != nothing ? memory_tolerance : default.memory_tolerance
-    return params::BenchmarkTools.Parameters
+    return params::BenchmarkExt.Parameters
 end
 
 function Base.:(==)(a::Parameters, b::Parameters)
diff --git a/src/serialization.jl b/src/serialization.jl
index 010c14bc..025fd794 100644
--- a/src/serialization.jl
+++ b/src/serialization.jl
@@ -1,5 +1,5 @@
 const VERSIONS = Dict("Julia" => string(VERSION),
-                      "BenchmarkTools" => string(BENCHMARKTOOLS_VERSION))
+                      "BenchmarkExt" => string(BENCHMARKEXT_VERSION))
 
 # TODO: Add any new types as they're added
 const SUPPORTED_TYPES = Dict{Symbol,Type}(Base.typename(x).name => x for x in [
@@ -33,7 +33,7 @@ function recover(x::Vector)
     length(x) == 2 || throw(ArgumentError("Expecting a vector of length 2"))
     typename = x[1]::String
     fields = x[2]::Dict
-    startswith(typename, "BenchmarkTools.") && (typename = typename[sizeof("BenchmarkTools.")+1:end])
+    startswith(typename, "BenchmarkExt.") && (typename = typename[sizeof("BenchmarkExt.")+1:end])
     T = SUPPORTED_TYPES[Symbol(typename)]
     fc = fieldcount(T)
     xs = Vector{Any}(undef, fc)
@@ -72,7 +72,7 @@ function badext(filename)
         "JLD serialization is no longer supported. Benchmarks should now be saved in\n" *
         "JSON format using `save(\"$noext.json\", args...)` and loaded from JSON using\n" *
         "`load(\"$noext.json\", args...)`. You will need to convert existing saved\n" *
-        "benchmarks to JSON in order to use them with this version of BenchmarkTools."
+        "benchmarks to JSON in order to use them with this version of BenchmarkExt."
     else
         "Only JSON serialization is supported."
     end
@@ -96,7 +96,7 @@ function save(io::IO, args...)
                   "in the order it appears in the input.")
             continue
         elseif !(arg isa get(SUPPORTED_TYPES, typeof(arg).name.name, Union{}))
-            throw(ArgumentError("Only BenchmarkTools types can be serialized."))
+            throw(ArgumentError("Only BenchmarkExt types can be serialized."))
         end
         push!(goodargs, arg)
     end
@@ -118,7 +118,7 @@ function load(io::IO, args...)
     end
     parsed = JSON.parse(io)
     if !isa(parsed, Vector) || length(parsed) != 2 || !isa(parsed[1], Dict) || !isa(parsed[2], Vector)
-        error("Unexpected JSON format. Was this file originally written by BenchmarkTools?")
+        error("Unexpected JSON format. Was this file originally written by BenchmarkExt?")
     end
     versions = parsed[1]::Dict
     values = parsed[2]::Vector
diff --git a/src/trials.jl b/src/trials.jl
index 639ee794..7a338566 100644
--- a/src/trials.jl
+++ b/src/trials.jl
@@ -6,11 +6,11 @@ mutable struct Trial
     params::Parameters
     times::Vector{Float64}
     gctimes::Vector{Float64}
-    memory::Int
-    allocs::Int
+    memory::Vector{Int}
+    allocs::Vector{Int}
 end
 
-Trial(params::Parameters) = Trial(params, Float64[], Float64[], typemax(Int), typemax(Int))
+Trial(params::Parameters) = Trial(params, Float64[], Float64[], Int[], Int[])
 
 function Base.:(==)(a::Trial, b::Trial)
     return a.params == b.params &&
@@ -20,31 +20,35 @@ function Base.:(==)(a::Trial, b::Trial)
            a.allocs == b.allocs
 end
 
-Base.copy(t::Trial) = Trial(copy(t.params), copy(t.times), copy(t.gctimes), t.memory, t.allocs)
+Base.copy(t::Trial) = Trial(copy(t.params), copy(t.times), copy(t.gctimes), copy(t.memory), copy(t.allocs))
 
 function Base.push!(t::Trial, time, gctime, memory, allocs)
     push!(t.times, time)
     push!(t.gctimes, gctime)
-    memory < t.memory && (t.memory = memory)
-    allocs < t.allocs && (t.allocs = allocs)
+    push!(t.memory, memory)
+    push!(t.allocs, allocs)
     return t
 end
 
 function Base.deleteat!(t::Trial, i)
     deleteat!(t.times, i)
     deleteat!(t.gctimes, i)
+    deleteat!(t.memory, i)
+    deleteat!(t.allocs, i)
     return t
 end
 
 Base.length(t::Trial) = length(t.times)
-Base.getindex(t::Trial, i::Number) = push!(Trial(t.params), t.times[i], t.gctimes[i], t.memory, t.allocs)
-Base.getindex(t::Trial, i) = Trial(t.params, t.times[i], t.gctimes[i], t.memory, t.allocs)
+Base.getindex(t::Trial, i::Number) = push!(Trial(t.params), t.times[i], t.gctimes[i], t.memory[i], t.allocs[i])
+Base.getindex(t::Trial, i) = Trial(t.params, t.times[i], t.gctimes[i], t.memory[i], t.allocs[i])
 Base.lastindex(t::Trial) = length(t)
 
 function Base.sort!(t::Trial)
     inds = sortperm(t.times)
     t.times = t.times[inds]
     t.gctimes = t.gctimes[inds]
+    t.memory = t.memory[inds]
+    t.allocs = t.allocs[inds]
     return t
 end
 
@@ -52,8 +56,8 @@ Base.sort(t::Trial) = sort!(copy(t))
 
 Base.time(t::Trial) = time(minimum(t))
 gctime(t::Trial) = gctime(minimum(t))
-memory(t::Trial) = t.memory
-allocs(t::Trial) = t.allocs
+memory(t::Trial) = memory(minimum(t))
+allocs(t::Trial) = allocs(minimum(t))
 params(t::Trial) = t.params
 
 # returns the index of the first outlier in `values`, if any outliers are detected.
@@ -90,12 +94,12 @@ mutable struct TrialEstimate
     params::Parameters
     time::Float64
     gctime::Float64
-    memory::Int
-    allocs::Int
+    memory::Float64
+    allocs::Float64
 end
 
-function TrialEstimate(trial::Trial, t, gct)
-    return TrialEstimate(params(trial), t, gct, memory(trial), allocs(trial))
+function TrialEstimate(trial::Trial, t, gct, mem, alloc)
+    return TrialEstimate(params(trial), t, gct, mem, alloc)
 end
 
 function Base.:(==)(a::TrialEstimate, b::TrialEstimate)
@@ -110,17 +114,17 @@ Base.copy(t::TrialEstimate) = TrialEstimate(copy(t.params), t.time, t.gctime, t.
 
 function Base.minimum(trial::Trial)
     i = argmin(trial.times)
-    return TrialEstimate(trial, trial.times[i], trial.gctimes[i])
+    return TrialEstimate(trial, trial.times[i], trial.gctimes[i], trial.memory[i], trial.allocs[i])
 end
 
 function Base.maximum(trial::Trial)
     i = argmax(trial.times)
-    return TrialEstimate(trial, trial.times[i], trial.gctimes[i])
+    return TrialEstimate(trial, trial.times[i], trial.gctimes[i], trial.memory[i], trial.allocs[i])
 end
 
-Statistics.median(trial::Trial) = TrialEstimate(trial, median(trial.times), median(trial.gctimes))
-Statistics.mean(trial::Trial) = TrialEstimate(trial, mean(trial.times), mean(trial.gctimes))
-Statistics.std(trial::Trial) = TrialEstimate(trial, std(trial.times), std(trial.gctimes))
+Statistics.median(trial::Trial) = TrialEstimate(trial, median(trial.times), median(trial.gctimes), median(trial.memory), median(trial.allocs))
+Statistics.mean(trial::Trial) = TrialEstimate(trial, mean(trial.times), mean(trial.gctimes), mean(trial.memory), mean(trial.allocs))
+Statistics.std(trial::Trial) = TrialEstimate(trial, std(trial.times), std(trial.gctimes), std(Float64.(trial.memory)), std(Float64.(trial.allocs)))
 
 Base.isless(a::TrialEstimate, b::TrialEstimate) = isless(time(a), time(b))
 
@@ -128,6 +132,7 @@ Base.time(t::TrialEstimate) = t.time
 gctime(t::TrialEstimate) = t.gctime
 memory(t::TrialEstimate) = t.memory
 allocs(t::TrialEstimate) = t.allocs
+iallocs(t::TrialEstimate) = round(Int, t.allocs)
 params(t::TrialEstimate) = t.params
 
 ##############
@@ -268,7 +273,7 @@ end
 
 function prettymemory(b)
     if b < 1024
-        return string(b, " bytes")
+        return string(round(Int, b), " bytes")
     elseif b < 1024^2
         value, units = b / 1024, "KiB"
     elseif b < 1024^3
@@ -341,7 +346,7 @@ Base.show(io::IO, t::TrialJudgement) = _show(io, t)
 function Base.show(io::IO, ::MIME"text/plain", t::Trial)
 
     pad = get(io, :pad, "")
-    print(io, "BenchmarkTools.Trial: ", length(t), " sample", if length(t) > 1 "s" else "" end,
+    print(io, "BenchmarkExt.Trial: ", length(t), " sample", if length(t) > 1 "s" else "" end,
           " with ", t.params.evals, " evaluation", if t.params.evals > 1 "s" else "" end ,".\n")
 
     perm = sortperm(t.times)
@@ -362,7 +367,7 @@ function Base.show(io::IO, ::MIME"text/plain", t::Trial)
         maxtime, maxgc = prettytime(time(max)), prettypercent(gcratio(max))
 
         memorystr = string(prettymemory(memory(min)))
-        allocsstr = string(allocs(min))
+        allocsstr = string(round(Int, allocs(min)))
     elseif length(t) == 1
         print(io, pad, " Single result which took ")
         printstyled(io, prettytime(times[1]); color=:blue)
@@ -504,16 +509,16 @@ function Base.show(io::IO, ::MIME"text/plain", t::Trial)
 end
 
 function Base.show(io::IO, ::MIME"text/plain", t::TrialEstimate)
-    println(io, "BenchmarkTools.TrialEstimate: ")
+    println(io, "BenchmarkExt.TrialEstimate: ")
     pad = get(io, :pad, "")
     println(io, pad, "  time:             ", prettytime(time(t)))
     println(io, pad, "  gctime:           ", prettytime(gctime(t)), " (", prettypercent(gctime(t) / time(t)),")")
     println(io, pad, "  memory:           ", prettymemory(memory(t)))
-    print(io,   pad, "  allocs:           ", allocs(t))
+    print(io,   pad, "  allocs:           ", round(Int, allocs(t)))
 end
 
 function Base.show(io::IO, ::MIME"text/plain", t::TrialRatio)
-    println(io, "BenchmarkTools.TrialRatio: ")
+    println(io, "BenchmarkExt.TrialRatio: ")
     pad = get(io, :pad, "")
     println(io, pad, "  time:             ", time(t))
     println(io, pad, "  gctime:           ", gctime(t))
@@ -522,7 +527,7 @@ function Base.show(io::IO, ::MIME"text/plain", t::TrialRatio)
 end
 
 function Base.show(io::IO, ::MIME"text/plain", t::TrialJudgement)
-    println(io, "BenchmarkTools.TrialJudgement: ")
+    println(io, "BenchmarkExt.TrialJudgement: ")
     pad = get(io, :pad, "")
     print(io, pad, "  time:   ", prettydiff(time(ratio(t))), " => ")
     printtimejudge(io, t)
diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl
deleted file mode 100644
index 4e72f76d..00000000
--- a/test/ExecutionTests.jl
+++ /dev/null
@@ -1,265 +0,0 @@
-module ExecutionTests
-
-using BenchmarkTools
-using Profile
-using Test
-
-seteq(a, b) = length(a) == length(b) == length(intersect(a, b))
-
-#########
-# setup #
-#########
-
-groups = BenchmarkGroup()
-groups["sum"] = BenchmarkGroup(["arithmetic"])
-groups["sin"] = BenchmarkGroup(["trig"])
-groups["special"] = BenchmarkGroup()
-
-sizes = (5, 10, 20)
-
-for s in sizes
-    A = rand(s, s)
-    groups["sum"][s] = @benchmarkable sum($A) seconds=3
-    groups["sin"][s] = @benchmarkable(sin($s), seconds=1, gctrial=false)
-end
-
-groups["special"]["macro"] = @benchmarkable @test(1 == 1)
-groups["special"]["nothing"] = @benchmarkable nothing
-groups["special"]["block"] = @benchmarkable begin rand(3) end
-groups["special"]["comprehension"] = @benchmarkable [s^2 for s in sizes]
-
-function testexpected(received::BenchmarkGroup, expected::BenchmarkGroup)
-    @test length(received) == length(expected)
-    @test seteq(received.tags, expected.tags)
-    @test seteq(keys(received), keys(expected))
-    for (k, v) in received
-        testexpected(v, expected[k])
-    end
-end
-
-function testexpected(trial::BenchmarkTools.Trial, args...)
-    @test length(trial) > 1
-end
-
-testexpected(b::BenchmarkTools.Benchmark, args...) = true
-
-#########
-# tune! #
-#########
-
-oldgroups = copy(groups)
-
-for id in keys(groups["special"])
-    testexpected(tune!(groups["special"][id]))
-end
-
-testexpected(tune!(groups["sin"], verbose = true), groups["sin"])
-testexpected(tune!(groups, verbose = true), groups)
-
-oldgroupscopy = copy(oldgroups)
-
-loadparams!(oldgroups, params(groups), :evals, :samples)
-loadparams!(oldgroups, params(groups))
-
-@test oldgroups == oldgroupscopy == groups
-
-#######
-# run #
-#######
-
-testexpected(run(groups; verbose = true), groups)
-testexpected(run(groups; seconds = 1, verbose = true, gctrial = false), groups)
-testexpected(run(groups; verbose = true, seconds = 1, gctrial = false, time_tolerance = 0.10, samples = 2, evals = 2, gcsample = false), groups)
-
-testexpected(run(groups["sin"]; verbose = true), groups["sin"])
-testexpected(run(groups["sin"]; seconds = 1, verbose = true, gctrial = false), groups["sin"])
-testexpected(run(groups["sin"]; verbose = true, seconds = 1, gctrial = false, time_tolerance = 0.10, samples = 2, evals = 2, gcsample = false), groups["sin"])
-
-testexpected(run(groups["sin"][first(sizes)]))
-testexpected(run(groups["sin"][first(sizes)]; seconds = 1, gctrial = false))
-testexpected(run(groups["sin"][first(sizes)]; seconds = 1, gctrial = false, time_tolerance = 0.10, samples = 2, evals = 2, gcsample = false))
-
-testexpected(run(groups["sum"][first(sizes)], BenchmarkTools.DEFAULT_PARAMETERS))
-
-###########
-# warmup #
-###########
-
-p = params(warmup(@benchmarkable sin(1)))
-
-@test p.samples == 1
-@test p.evals == 1
-@test p.gctrial == false
-@test p.gcsample == false
-
-##############
-# @benchmark #
-##############
-
-mutable struct Foo
-    x::Int
-end
-
-const foo = Foo(-1)
-
-t = @benchmark sin(foo.x) evals=3 samples=10 setup=(foo.x = 0)
-
-@test foo.x == 0
-@test params(t).evals == 3
-@test params(t).samples == 10
-
-b = @benchmarkable sin(x) setup=(foo.x = -1; x = foo.x) teardown=(@assert(x == -1); foo.x = 1)
-tune!(b)
-
-@test foo.x == 1
-@test params(b).evals > 100
-
-foo.x = 0
-tune!(b)
-
-@test foo.x == 1
-@test params(b).evals > 100
-
-# test variable assignment with `@benchmark args...` form
-@benchmark local_var="good" setup=(local_var="bad") teardown=(@test local_var=="good")
-@test_throws UndefVarError local_var
-@benchmark some_var="whatever" teardown=(@test_throws UndefVarError some_var)
-@benchmark foo,bar="good","good" setup=(foo="bad"; bar="bad") teardown=(@test foo=="good" && bar=="good")
-
-# test variable assignment with `@benchmark(args...)` form
-@benchmark(local_var="good", setup=(local_var="bad"), teardown=(@test local_var=="good"))
-@test_throws UndefVarError local_var
-@benchmark(some_var="whatever", teardown=(@test_throws UndefVarError some_var))
-@benchmark((foo,bar) = ("good","good"), setup=(foo = "bad"; bar = "bad"), teardown=(@test foo == "good" && bar == "good"))
-
-# test kwargs separated by `,`
-@benchmark(output=sin(x), setup=(x=1.0; output=0.0), teardown=(@test output == sin(x)))
-
-for (tf, rex1, rex2) in ((false, r"0.5 ns +Histogram: frequency by time +8 ns",        r"Histogram: frequency"),
-                         (true,  r"0.5 ns +Histogram: log\(frequency\) by time +8 ns", r"Histogram: log\(frequency\)"))
-    io = IOBuffer()
-    ioctx = IOContext(io, :histmin=>0.5, :histmax=>8, :logbins=>tf)
-    @show tf
-    b = @benchmark x^3   setup=(x = rand()); show(ioctx, MIME("text/plain"), b)
-    b = @benchmark x^3.0 setup=(x = rand()); show(ioctx, MIME("text/plain"), b)
-    str = String(take!(io))
-    idx = findfirst(rex1, str)
-    @test isa(idx, UnitRange)
-    idx = findnext( rex1, str, idx[end]+1)
-    @test isa(idx, UnitRange)
-    ioctx = IOContext(io, :logbins=>tf)
-    # A flat distribution won't trigger log by default
-    b = BenchmarkTools.Trial(BenchmarkTools.DEFAULT_PARAMETERS, 0.001 * (1:100) * 1e9, zeros(100), 0, 0)
-    show(ioctx, MIME("text/plain"), b)
-    str = String(take!(io))
-    idx = findfirst(rex2, str)
-    @test isa(idx, UnitRange)
-    # A peaked distribution will trigger log by default
-    t = [fill(1, 21); 2]
-    b = BenchmarkTools.Trial(BenchmarkTools.DEFAULT_PARAMETERS, t/sum(t)*1e9*BenchmarkTools.DEFAULT_PARAMETERS.seconds, zeros(100), 0, 0)
-    show(ioctx, MIME("text/plain"), b)
-    str = String(take!(io))
-    idx = findfirst(rex2, str)
-    @test isa(idx, UnitRange)
-end
-
-#############
-# @bprofile #
-#############
-
-function likegcd(a::T, b::T) where T<:Base.BitInteger
-    za = trailing_zeros(a)
-    zb = trailing_zeros(b)
-    k = min(za, zb)
-    u = unsigned(abs(a >> za))
-    v = unsigned(abs(b >> zb))
-    while u != v
-        if u > v
-            u, v = v, u
-        end
-        v -= u
-        v >>= trailing_zeros(v)
-    end
-    r = u << k
-    return r % T
-end
-
-b = @bprofile likegcd(x, y) setup=(x = rand(2:200); y = rand(2:200))
-@test isa(b, BenchmarkTools.Trial)
-io = IOBuffer()
-Profile.print(IOContext(io, :displaysize=>(24,200)))
-str = String(take!(io))
-@test  occursin(r"BenchmarkTools(\.jl)?(/|\\)src(/|\\)execution\.jl:\d+; _run", str)
-@test !occursin(r"BenchmarkTools(\.jl)?(/|\\)src(/|\\)execution\.jl:\d+; warmup", str)
-@test !occursin(r"BenchmarkTools(\.jl)?(/|\\)src(/|\\)execution\.jl:\d+; tune!", str)
-b = @bprofile 1+1
-Profile.print(IOContext(io, :displaysize=>(24,200)))
-str = String(take!(io))
-@test !occursin("gcscrub", str)
-b = @bprofile 1+1 gctrial=true
-Profile.print(IOContext(io, :displaysize=>(24,200)))
-str = String(take!(io))
-@test  occursin("gcscrub", str)
-
-########
-# misc #
-########
-
-# This test is volatile in nonquiescent environments (e.g. Travis)
-# BenchmarkTools.DEFAULT_PARAMETERS.overhead = BenchmarkTools.estimate_overhead()
-# @test time(minimum(@benchmark nothing)) == 1
-
-@test [:x, :y, :z, :v, :w] == BenchmarkTools.collectvars(quote
-           x = 1 + 3
-           y = 1 + x
-           z = (a = 4; y + a)
-           v,w = 1,2
-           [u^2 for u in [1,2,3]]
-       end)
-
-# this should take < 1 s on any sane machine
-@test @belapsed(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) < 1
-@test @belapsed(sin(0)) < 1
-
-@test @ballocated(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) == 0
-@test @ballocated(sin(0)) == 0
-@test @ballocated(Ref(1)) == 2*sizeof(Int)  # 1 for the pointer, 1 for content
-
-let fname = tempname()
-    try
-        ret = open(fname, "w") do f
-            redirect_stdout(f) do
-                x = 1
-                a = nothing
-                y = @btime(sin($x))
-                @test y == sin(1)
-                @test a === nothing
-            end
-        end
-        s = read(fname, String)
-        try
-            @test occursin(r"[0-9.]+ \w*s \([0-9]* allocations?: [0-9]+ bytes\)", s)
-        catch
-            println(stderr, "@btime output didn't match ", repr(s))
-            rethrow()
-        end
-    finally
-        isfile(fname) && rm(fname)
-    end
-end
-
-# issue #107
-let time = 2
-    @benchmark identity(time)
-end
-
-# Ensure that interpolated values are garbage-collectable
-x = []
-x_finalized = false
-finalizer(x->(global x_finalized=true), x)
-b = @benchmarkable $x
-b = x = nothing
-GC.gc()
-@test x_finalized
-
-end # module
diff --git a/test/GroupsTests.jl b/test/GroupsTests.jl
deleted file mode 100644
index 5fe56f84..00000000
--- a/test/GroupsTests.jl
+++ /dev/null
@@ -1,322 +0,0 @@
-# module GroupsTests
-
-using BenchmarkTools
-using BenchmarkTools: TrialEstimate, Parameters
-using Test
-
-seteq(a, b) = length(a) == length(b) == length(intersect(a, b))
-
-##################
-# BenchmarkGroup #
-##################
-
-# setup #
-#-------#
-
-g1 = BenchmarkGroup(["1", "2"])
-
-t1a = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 32, 1, 2, 3)
-t1b = TrialEstimate(Parameters(time_tolerance = .40, memory_tolerance = .40), 4123, 123, 43, 9)
-tc = TrialEstimate(Parameters(time_tolerance = 1.0, memory_tolerance = 1.0), 1, 1, 1, 1)
-
-g1["a"] = t1a
-g1["b"] = t1b
-g1["c"] = tc
-
-g1copy = copy(g1)
-g1similar = similar(g1)
-
-g2 = BenchmarkGroup(["2", "3"])
-
-t2a = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 323, 1, 2, 3)
-t2b = TrialEstimate(Parameters(time_tolerance = .40, memory_tolerance = .40), 1002, 123, 43, 9)
-
-g2["a"] = t2a
-g2["b"] = t2b
-g2["c"] = tc
-
-trial = BenchmarkTools.Trial(Parameters(), [1, 2, 5], [0, 1, 1], 3, 56)
-
-gtrial = BenchmarkGroup([], Dict("t" => trial))
-
-# tests #
-#-------#
-
-@test BenchmarkGroup() == BenchmarkGroup([], Dict())
-@test length(g1) == 3
-@test g1["a"] == t1a
-@test g1["b"] == t1b
-@test g1["c"] == tc
-@test haskey(g1, "a")
-@test !(haskey(g1, "x"))
-@test seteq(keys(g1), ["a", "b", "c"])
-@test seteq(values(g1), [t1a, t1b, tc])
-@test iterate(g1) == iterate(g1.data)
-@test iterate(g1, 1) == iterate(g1.data, 1)
-@test seteq([x for x in g1], Pair["a"=>t1a, "b"=>t1b, "c"=>tc])
-
-@test g1 == g1copy
-@test seteq(keys(delete!(g1copy, "a")), ["b", "c"])
-@test isempty(delete!(delete!(g1copy, "b"), "c"))
-@test isempty(g1similar)
-@test g1similar.tags == g1.tags
-
-@test time(g1).data == Dict("a" => time(t1a), "b" => time(t1b), "c" => time(tc))
-@test gctime(g1).data == Dict("a" => gctime(t1a), "b" => gctime(t1b), "c" => gctime(tc))
-@test memory(g1).data == Dict("a" => memory(t1a), "b" => memory(t1b), "c" => memory(tc))
-@test allocs(g1).data == Dict("a" => allocs(t1a), "b" => allocs(t1b), "c" => allocs(tc))
-@test params(g1).data == Dict("a" => params(t1a), "b" => params(t1b), "c" => params(tc))
-
-@test max(g1, g2).data == Dict("a" => t2a, "b" => t1b, "c" => tc)
-@test min(g1, g2).data == Dict("a" => t1a, "b" => t2b, "c" => tc)
-@test ratio(g1, g2).data == Dict("a" => ratio(t1a, t2a), "b" => ratio(t1b, t2b), "c" => ratio(tc, tc))
-@test (judge(g1, g2; time_tolerance = 0.1, memory_tolerance = 0.1).data ==
-       Dict("a" => judge(t1a, t2a; time_tolerance = 0.1, memory_tolerance = 0.1),
-            "b" => judge(t1b, t2b; time_tolerance = 0.1, memory_tolerance = 0.1),
-            "c" => judge(tc, tc; time_tolerance = 0.1, memory_tolerance = 0.1)))
-@test (judge(ratio(g1, g2); time_tolerance = 0.1, memory_tolerance = 0.1) ==
-       judge(g1, g2; time_tolerance = 0.1, memory_tolerance = 0.1))
-@test ratio(g1, g2) == ratio(judge(g1, g2))
-
-@test isinvariant(judge(g1, g1))
-@test isinvariant(time, judge(g1, g1))
-@test isinvariant(memory, judge(g1, g1))
-@test !(isregression(judge(g1, g1)))
-@test !(isregression(time, judge(g1, g1)))
-@test !(isregression(memory, judge(g1, g1)))
-@test !(isimprovement(judge(g1, g1)))
-@test !(isimprovement(time, judge(g1, g1)))
-@test !(isimprovement(memory, judge(g1, g1)))
-
-@test BenchmarkTools.invariants(judge(g1, g2)).data == Dict("c" => judge(tc, tc))
-@test BenchmarkTools.invariants(time, (judge(g1, g2))).data == Dict("c" => judge(tc, tc))
-@test BenchmarkTools.invariants(memory, (judge(g1, g2))).data == Dict("a" => judge(t1a, t2a), "b" => judge(t1b, t2b), "c" => judge(tc, tc))
-@test BenchmarkTools.regressions(judge(g1, g2)).data == Dict("b" => judge(t1b, t2b))
-@test BenchmarkTools.regressions(time, (judge(g1, g2))).data == Dict("b" => judge(t1b, t2b))
-@test BenchmarkTools.regressions(memory, (judge(g1, g2))).data == Dict()
-@test BenchmarkTools.improvements(judge(g1, g2)).data == Dict("a" => judge(t1a, t2a))
-@test BenchmarkTools.improvements(time, (judge(g1, g2))).data == Dict("a" => judge(t1a, t2a))
-@test BenchmarkTools.improvements(memory, (judge(g1, g2))).data == Dict()
-
-@test isinvariant(judge(g1, g1))
-@test !(isinvariant(judge(g1, g2)))
-@test isregression(judge(g1, g2))
-@test !(isregression(judge(g1, g1)))
-@test isimprovement(judge(g1, g2))
-@test !(isimprovement(judge(g1, g1)))
-@test invariants(judge(g1, g2)).data == Dict("c" => judge(tc, tc))
-@test regressions(judge(g1, g2)).data == Dict("b" => judge(t1b, t2b))
-@test improvements(judge(g1, g2)).data == Dict("a" => judge(t1a, t2a))
-
-struct Bar end
-@test BenchmarkTools.invariants(Bar()) == Bar()
-@test BenchmarkTools.invariants(time, (Bar())) == Bar()
-@test BenchmarkTools.invariants(memory, (Bar())) == Bar()
-@test BenchmarkTools.regressions(Bar()) == Bar()
-@test BenchmarkTools.regressions(time, (Bar())) == Bar()
-@test BenchmarkTools.regressions(memory, (Bar())) == Bar()
-@test BenchmarkTools.improvements(Bar()) == Bar()
-@test BenchmarkTools.improvements(time, (Bar())) == Bar()
-@test BenchmarkTools.improvements(memory, (Bar())) == Bar()
-
-@test minimum(gtrial)["t"] == minimum(gtrial["t"])
-@test median(gtrial)["t"] == median(gtrial["t"])
-@test mean(gtrial)["t"] == mean(gtrial["t"])
-@test maximum(gtrial)["t"] == maximum(gtrial["t"])
-@test params(gtrial)["t"] == params(gtrial["t"])
-
-######################################
-# BenchmarkGroups of BenchmarkGroups #
-######################################
-
-# setup #
-#-------#
-
-groupsa = BenchmarkGroup()
-groupsa["g1"] = g1
-groupsa["g2"] = g2
-g3a = addgroup!(groupsa, "g3", ["3", "4"])
-g3a["c"] = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 6341, 23, 41, 536)
-g3a["d"] = TrialEstimate(Parameters(time_tolerance = .13, memory_tolerance = .13), 12341, 3013, 2, 150)
-
-groups_copy = copy(groupsa)
-groups_similar = similar(groupsa)
-
-groupsb = BenchmarkGroup()
-groupsb["g1"] = g1
-groupsb["g2"] = g2
-g3b = addgroup!(groupsb, "g3", ["3", "4"])
-g3b["c"] = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 1003, 23, 41, 536)
-g3b["d"] = TrialEstimate(Parameters(time_tolerance = .23, memory_tolerance = .23), 25341, 3013, 2, 150)
-
-groupstrial = BenchmarkGroup()
-groupstrial["g"] = gtrial
-
-# tests #
-#-------#
-
-@test time(groupsa).data == Dict("g1" => time(g1), "g2" => time(g2), "g3" => time(g3a))
-@test gctime(groupsa).data == Dict("g1" => gctime(g1), "g2" => gctime(g2), "g3" => gctime(g3a))
-@test memory(groupsa).data == Dict("g1" => memory(g1), "g2" => memory(g2), "g3" => memory(g3a))
-@test allocs(groupsa).data == Dict("g1" => allocs(g1), "g2" => allocs(g2), "g3" => allocs(g3a))
-@test params(groupsa).data == Dict("g1" => params(g1), "g2" => params(g2), "g3" => params(g3a))
-
-for (k, v) in BenchmarkTools.leaves(groupsa)
-    @test groupsa[k] == v
-end
-
-@test max(groupsa, groupsb).data == Dict("g1" => max(g1, g1), "g2" => max(g2, g2), "g3" => max(g3a, g3b))
-@test min(groupsa, groupsb).data == Dict("g1" => min(g1, g1), "g2" => min(g2, g2), "g3" => min(g3a, g3b))
-@test ratio(groupsa, groupsb).data == Dict("g1" => ratio(g1, g1), "g2" => ratio(g2, g2), "g3" => ratio(g3a, g3b))
-@test (judge(groupsa, groupsb; time_tolerance = 0.1, memory_tolerance = 0.1).data ==
-       Dict("g1" => judge(g1, g1; time_tolerance = 0.1, memory_tolerance = 0.1),
-            "g2" => judge(g2, g2; time_tolerance = 0.1, memory_tolerance = 0.1),
-            "g3" => judge(g3a, g3b; time_tolerance = 0.1, memory_tolerance = 0.1)))
-@test (judge(ratio(groupsa, groupsb); time_tolerance = 0.1, memory_tolerance = 0.1) ==
-       judge(groupsa, groupsb; time_tolerance = 0.1, memory_tolerance = 0.1))
-@test ratio(groupsa, groupsb) == ratio(judge(groupsa, groupsb))
-
-@test isinvariant(judge(groupsa, groupsa))
-@test !(isinvariant(judge(groupsa, groupsb)))
-@test isregression(judge(groupsa, groupsb))
-@test !(isregression(judge(groupsa, groupsa)))
-@test isimprovement(judge(groupsa, groupsb))
-@test !(isimprovement(judge(groupsa, groupsa)))
-@test invariants(judge(groupsa, groupsb)).data == Dict("g1" => judge(g1, g1), "g2" => judge(g2, g2))
-@test regressions(judge(groupsa, groupsb)).data == Dict("g3" => regressions(judge(g3a, g3b)))
-@test improvements(judge(groupsa, groupsb)).data == Dict("g3" => improvements(judge(g3a, g3b)))
-
-@test minimum(groupstrial)["g"]["t"] == minimum(groupstrial["g"]["t"])
-@test maximum(groupstrial)["g"]["t"] == maximum(groupstrial["g"]["t"])
-@test median(groupstrial)["g"]["t"] == median(groupstrial["g"]["t"])
-@test mean(groupstrial)["g"]["t"] == mean(groupstrial["g"]["t"])
-@test params(groupstrial)["g"]["t"] == params(groupstrial["g"]["t"])
-
-# tagging #
-#---------#
-
-@test groupsa[@tagged "1"] == BenchmarkGroup([], "g1" => g1)
-@test groupsa[@tagged "2"] == BenchmarkGroup([], "g1" => g1, "g2" => g2)
-@test groupsa[@tagged "3"] == BenchmarkGroup([], "g2" => g2, "g3" => g3a)
-@test groupsa[@tagged "4"] == BenchmarkGroup([], "g3" => g3a)
-@test groupsa[@tagged "3" && "4"] == groupsa[@tagged "4"]
-@test groupsa[@tagged ALL && !("2")] == groupsa[@tagged !("2")]
-@test groupsa[@tagged "1" || "4"] == BenchmarkGroup([], "g1" => g1, "g3" => g3a)
-@test groupsa[@tagged ("1" || "4") && !("2")] == groupsa[@tagged "4"]
-@test groupsa[@tagged !("1" || "4") && "2"] == BenchmarkGroup([], "g2" => g2)
-@test groupsa[@tagged ALL] == groupsa
-@test groupsa[@tagged !("1" || "3") && !("4")] == similar(groupsa)
-
-gnest = BenchmarkGroup(["1"],
-                       "2" => BenchmarkGroup(["3"], 1 => 1),
-                       4 => BenchmarkGroup(["3"], 5 => 6),
-                       7 => 8,
-                       "a" => BenchmarkGroup(["3"], "a" => :a, (11, "b") => :b),
-                       9 => BenchmarkGroup(["2"],
-                                           10 => BenchmarkGroup(["3"]),
-                                           11 => BenchmarkGroup()))
-
-@test sort(leaves(gnest), by=string) ==
-      Any[(Any["2",1],1), (Any["a","a"],:a), (Any["a",(11,"b")],:b), (Any[4,5],6), (Any[7],8)]
-
-@test gnest[@tagged 11 || 10] == BenchmarkGroup(["1"],
-                                                "a" => BenchmarkGroup(["3"],
-                                                                      (11, "b") => :b),
-                                                9 => gnest[9])
-
-@test gnest[@tagged "3"] == BenchmarkGroup(["1"], "2" => gnest["2"], 4 => gnest[4], "a" => gnest["a"],
-                                           9 => BenchmarkGroup(["2"], 10 => BenchmarkGroup(["3"])))
-
-@test gnest[@tagged "1" && "2" && "3"] == BenchmarkGroup(["1"], "2" => gnest["2"],
-                                                          9 => BenchmarkGroup(["2"], 10 => BenchmarkGroup(["3"])))
-
-k = 3 + im
-gnest = BenchmarkGroup(["1"], :hi => BenchmarkGroup([], 1 => 1, k => BenchmarkGroup(["3"], 1 => 1)), 2 => 1)
-
-@test gnest[@tagged "1"] == gnest
-@test gnest[@tagged "1" && !(:hi)] == BenchmarkGroup(["1"], 2 => 1)
-@test gnest[@tagged :hi && !("3")] == BenchmarkGroup(["1"], :hi => BenchmarkGroup([], 1 => 1))
-@test gnest[@tagged k] == BenchmarkGroup(["1"], :hi => BenchmarkGroup([], k => BenchmarkGroup(["3"], 1 => 1)))
-
-# indexing by BenchmarkGroup #
-#----------------------------#
-
-g = BenchmarkGroup()
-d = Dict("1" => 1, "2" => 2, "3" => 3)
-g["a"] = BenchmarkGroup([], copy(d))
-g["b"] = BenchmarkGroup([], copy(d))
-g["c"] = BenchmarkGroup([], copy(d))
-g["d"] = BenchmarkGroup([], copy(d))
-g["e"] = BenchmarkGroup([], "1" => BenchmarkGroup([], copy(d)),
-                            "2" => BenchmarkGroup([], copy(d)),
-                            "3" => BenchmarkGroup([], copy(d)))
-
-x = BenchmarkGroup()
-x["a"] = BenchmarkGroup([], "1" => '1', "3" => '3')
-x["c"] = BenchmarkGroup([], "2" => '2')
-x["d"] = BenchmarkGroup([], "1" => '1', "2" => '2', "3" => '3')
-x["e"] = BenchmarkGroup([], "1" => x["a"], "3" => x["c"])
-
-gx = BenchmarkGroup()
-gx["a"] = BenchmarkGroup([], "1" => 1, "3" => 3)
-gx["c"] = BenchmarkGroup([], "2" => 2)
-gx["d"] = BenchmarkGroup([], "1" => 1, "2" => 2, "3" => 3)
-gx["e"] = BenchmarkGroup([], "1" => g["e"]["1"][x["a"]], "3" => g["e"]["3"][x["c"]])
-
-@test g[x] == gx
-
-# indexing by Vector #
-#--------------------#
-
-g1 = BenchmarkGroup(1 => BenchmarkGroup("a" => BenchmarkGroup()))
-g1[[1, "a", :b]] = "hello"
-@test g1[[1, "a", :b]] == "hello"
-
-g2 = BenchmarkGroup()
-g2[[1, "a", :b]] = "hello"  # should create higher levels on the fly
-@test g2[[1, "a", :b]] == "hello"
-
-@test g1 == g2
-
-@testset "benchmarkset" begin
-    g1 = @benchmarkset "test set" begin
-       @case "test case 1" 1 + 1
-       @case "test case 2" 2 + 2
-    end
-
-    @test haskey(g1, "test set")
-    @test haskey(g1["test set"], "test case 1")
-    @test haskey(g1["test set"], "test case 2")
-end
-# pretty printing #
-#-----------------#
-
-g1 = BenchmarkGroup(["1", "2"])
-g1["a"] = t1a
-g1["b"] = t1b
-g1["c"] = tc
-
-@test sprint(show, g1) == """
-3-element BenchmarkTools.BenchmarkGroup:
-  tags: ["1", "2"]
-  "c" => TrialEstimate(1.000 ns)
-  "b" => TrialEstimate(4.123 μs)
-  "a" => TrialEstimate(32.000 ns)"""
-@test sprint(show, g1; context = :boundto => 1) == """
-3-element BenchmarkTools.BenchmarkGroup:
-  tags: ["1", "2"]
-  "c" => TrialEstimate(1.000 ns)
-  ⋮"""
-@test sprint(show, g1; context = :limit => false) == """
-3-element BenchmarkTools.BenchmarkGroup:
-  tags: ["1", "2"]
-  "c" => TrialEstimate(1.000 ns)
-  "b" => TrialEstimate(4.123 μs)
-  "a" => TrialEstimate(32.000 ns)"""
-@test @test_deprecated(sprint(show, g1; context = :limit => 1)) == """
-3-element BenchmarkTools.BenchmarkGroup:
-  tags: ["1", "2"]
-  "c" => TrialEstimate(1.000 ns)
-  ⋮"""
-
-# end # module
diff --git a/test/ParametersTests.jl b/test/ParametersTests.jl
deleted file mode 100644
index cb07e1e9..00000000
--- a/test/ParametersTests.jl
+++ /dev/null
@@ -1,48 +0,0 @@
-module ParametersTests
-
-using Test
-using BenchmarkTools
-using BenchmarkTools: Parameters
-
-@test BenchmarkTools.DEFAULT_PARAMETERS == Parameters()
-
-p = Parameters(seconds = 1, gctrial = false)
-oldseconds = BenchmarkTools.DEFAULT_PARAMETERS.seconds
-oldgctrial = BenchmarkTools.DEFAULT_PARAMETERS.gctrial
-BenchmarkTools.DEFAULT_PARAMETERS.seconds = p.seconds
-BenchmarkTools.DEFAULT_PARAMETERS.gctrial = p.gctrial
-@test p == Parameters()
-@test Parameters(p; evals = 3, time_tolerance = .32) == Parameters(evals = 3, time_tolerance = .32)
-BenchmarkTools.DEFAULT_PARAMETERS.seconds = oldseconds
-BenchmarkTools.DEFAULT_PARAMETERS.gctrial = oldgctrial
-
-p = Parameters(seconds = 1, gctrial = false, samples = 2, evals = 2, overhead = 42,
-               gcsample = false, time_tolerance = 0.043, memory_tolerance = 0.15)
-oldseconds = BenchmarkTools.DEFAULT_PARAMETERS.seconds
-oldgctrial = BenchmarkTools.DEFAULT_PARAMETERS.gctrial
-old_time_tolerance = BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance
-old_memory_tolerance = BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance
-oldsamples = BenchmarkTools.DEFAULT_PARAMETERS.samples
-oldevals = BenchmarkTools.DEFAULT_PARAMETERS.evals
-oldoverhead = BenchmarkTools.DEFAULT_PARAMETERS.overhead
-oldgcsample = BenchmarkTools.DEFAULT_PARAMETERS.gcsample
-BenchmarkTools.DEFAULT_PARAMETERS.seconds = p.seconds
-BenchmarkTools.DEFAULT_PARAMETERS.gctrial = p.gctrial
-BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = p.time_tolerance
-BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = p.memory_tolerance
-BenchmarkTools.DEFAULT_PARAMETERS.samples = p.samples
-BenchmarkTools.DEFAULT_PARAMETERS.evals = p.evals
-BenchmarkTools.DEFAULT_PARAMETERS.overhead = p.overhead
-BenchmarkTools.DEFAULT_PARAMETERS.gcsample = p.gcsample
-@test p == Parameters()
-@test p == Parameters(p)
-BenchmarkTools.DEFAULT_PARAMETERS.seconds = oldseconds
-BenchmarkTools.DEFAULT_PARAMETERS.gctrial = oldgctrial
-BenchmarkTools.DEFAULT_PARAMETERS.time_tolerance = old_time_tolerance
-BenchmarkTools.DEFAULT_PARAMETERS.memory_tolerance = old_memory_tolerance
-BenchmarkTools.DEFAULT_PARAMETERS.samples = oldsamples
-BenchmarkTools.DEFAULT_PARAMETERS.evals = oldevals
-BenchmarkTools.DEFAULT_PARAMETERS.overhead = oldoverhead
-BenchmarkTools.DEFAULT_PARAMETERS.gcsample = oldgcsample
-
-end # module
diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl
deleted file mode 100644
index c9b9681f..00000000
--- a/test/TrialsTests.jl
+++ /dev/null
@@ -1,249 +0,0 @@
-module TrialsTests
-
-using BenchmarkTools
-using Test
-
-#########
-# Trial #
-#########
-
-trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(evals = 2))
-push!(trial1, 2, 1, 4, 5)
-push!(trial1, 21, 0, 41, 51)
-
-trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(time_tolerance = 0.15))
-push!(trial2, 21, 0, 41, 51)
-push!(trial2, 2, 1, 4, 5)
-
-push!(trial2, 21, 0, 41, 51)
-@test length(trial2) == 3
-deleteat!(trial2, 3)
-@test length(trial1) == length(trial2) == 2
-sort!(trial2)
-
-@test trial1.params == BenchmarkTools.Parameters(evals = trial1.params.evals)
-@test trial2.params == BenchmarkTools.Parameters(time_tolerance = trial2.params.time_tolerance)
-@test trial1.times == trial2.times == [2.0, 21.0]
-@test trial1.gctimes == trial2.gctimes == [1.0, 0.0]
-@test trial1.memory == trial2.memory ==  4
-@test trial1.allocs == trial2.allocs == 5
-
-trial2.params = trial1.params
-
-@test trial1 == trial2
-
-@test trial1[2] == push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(evals = 2)), 21, 0, 4, 5)
-@test trial1[1:end] == trial1
-
-@test time(trial1) == time(trial2) == 2.0
-@test gctime(trial1) == gctime(trial2) == 1.0
-@test memory(trial1) == memory(trial2) == trial1.memory
-@test allocs(trial1) == allocs(trial2) == trial1.allocs
-@test params(trial1) == params(trial2) == trial1.params
-
-# outlier trimming
-trial3 = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [1, 2, 3, 10, 11],
-                              [1, 1, 1, 1, 1], 1, 1)
-
-trimtrial3 = rmskew(trial3)
-rmskew!(trial3)
-
-@test mean(trimtrial3) <= median(trimtrial3)
-@test trimtrial3 == trial3
-
-#################
-# TrialEstimate #
-#################
-
-randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters())
-
-for _ in 1:40
-    push!(randtrial, rand(1:20), 1, 1, 1)
-end
-
-while mean(randtrial) <= median(randtrial)
-    push!(randtrial, rand(10:20), 1, 1, 1)
-end
-
-rmskew!(randtrial)
-
-tmin = minimum(randtrial)
-tmed = median(randtrial)
-tmean = mean(randtrial)
-tmax = maximum(randtrial)
-
-@test time(tmin) == time(randtrial)
-@test gctime(tmin) == gctime(randtrial)
-@test memory(tmin) == memory(tmed) == memory(tmean) == memory(tmax) == memory(randtrial)
-@test allocs(tmin) == allocs(tmed) == allocs(tmean) == allocs(tmax) == allocs(randtrial)
-@test params(tmin) == params(tmed) == params(tmean) == params(tmax) == params(randtrial)
-
-@test tmin <= tmed
-@test tmean <= tmed # this should be true since we called rmoutliers!(randtrial) earlier
-@test tmed <= tmax
-
-##############
-# TrialRatio #
-##############
-
-randrange = 1.0:0.01:10.0
-x, y = rand(randrange), rand(randrange)
-
-@test (ratio(x, y) == x/y) && (ratio(y, x) == y/x)
-@test (ratio(x, x) == 1.0) && (ratio(y, y) == 1.0)
-@test ratio(0.0, 0.0) == 1.0
-
-ta = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(), rand(), rand(), rand(Int), rand(Int))
-tb = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(), rand(), rand(), rand(Int), rand(Int))
-tr = ratio(ta, tb)
-
-@test time(tr) == ratio(time(ta), time(tb))
-@test gctime(tr) == ratio(gctime(ta), gctime(tb))
-@test memory(tr) == ratio(memory(ta), memory(tb))
-@test allocs(tr) == ratio(allocs(ta), allocs(tb))
-@test params(tr) == params(ta) == params(tb)
-
-@test BenchmarkTools.gcratio(ta) == ratio(gctime(ta), time(ta))
-@test BenchmarkTools.gcratio(tb) == ratio(gctime(tb), time(tb))
-
-##################
-# TrialJudgement #
-##################
-
-ta = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(time_tolerance = 0.50, memory_tolerance = 0.50), 0.49, 0.0, 2, 1)
-tb = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(time_tolerance = 0.05, memory_tolerance = 0.05), 1.00, 0.0, 1, 1)
-tr = ratio(ta, tb)
-tj_ab = judge(ta, tb)
-tj_r = judge(tr)
-
-@test ratio(tj_ab) == ratio(tj_r) == tr
-@test time(tj_ab) == time(tj_r) == :improvement
-@test memory(tj_ab) == memory(tj_r) == :regression
-@test tj_ab == tj_r
-
-tj_ab_2 = judge(ta, tb; time_tolerance = 2.0, memory_tolerance = 2.0)
-tj_r_2 = judge(tr; time_tolerance = 2.0, memory_tolerance = 2.0)
-
-@test tj_ab_2 == tj_r_2
-@test ratio(tj_ab_2) == ratio(tj_r_2)
-@test time(tj_ab_2) == time(tj_r_2) == :invariant
-@test memory(tj_ab_2) == memory(tj_r_2) == :invariant
-
-@test !(isinvariant(tj_ab))
-@test !(isinvariant(tj_r))
-@test isinvariant(tj_ab_2)
-@test isinvariant(tj_r_2)
-
-@test !(isinvariant(time, tj_ab))
-@test !(isinvariant(time, tj_r))
-@test isinvariant(time, tj_ab_2)
-@test isinvariant(time, tj_r_2)
-
-@test !(isinvariant(memory, tj_ab))
-@test !(isinvariant(memory, tj_r))
-@test isinvariant(memory, tj_ab_2)
-@test isinvariant(memory, tj_r_2)
-
-@test isregression(tj_ab)
-@test isregression(tj_r)
-@test !(isregression(tj_ab_2))
-@test !(isregression(tj_r_2))
-
-@test !(isregression(time, tj_ab))
-@test !(isregression(time, tj_r))
-@test !(isregression(time, tj_ab_2))
-@test !(isregression(time, tj_r_2))
-
-@test isregression(memory, tj_ab)
-@test isregression(memory, tj_r)
-@test !(isregression(memory, tj_ab_2))
-@test !(isregression(memory, tj_r_2))
-
-@test isimprovement(tj_ab)
-@test isimprovement(tj_r)
-@test !(isimprovement(tj_ab_2))
-@test !(isimprovement(tj_r_2))
-
-@test isimprovement(time, tj_ab)
-@test isimprovement(time, tj_r)
-@test !(isimprovement(time, tj_ab_2))
-@test !(isimprovement(time, tj_r_2))
-
-@test !(isimprovement(memory, tj_ab))
-@test !(isimprovement(memory, tj_r))
-@test !(isimprovement(memory, tj_ab_2))
-@test !(isimprovement(memory, tj_r_2))
-
-###################
-# pretty printing #
-###################
-
-@test BenchmarkTools.prettypercent(.3120123) == "31.20%"
-
-@test BenchmarkTools.prettydiff(0.0) == "-100.00%"
-@test BenchmarkTools.prettydiff(1.0) == "+0.00%"
-@test BenchmarkTools.prettydiff(2.0) == "+100.00%"
-
-@test BenchmarkTools.prettytime(999) == "999.000 ns"
-@test BenchmarkTools.prettytime(1000) == "1.000 μs"
-@test BenchmarkTools.prettytime(999_999) == "999.999 μs"
-@test BenchmarkTools.prettytime(1_000_000) == "1.000 ms"
-@test BenchmarkTools.prettytime(999_999_999) == "1000.000 ms"
-@test BenchmarkTools.prettytime(1_000_000_000) == "1.000 s"
-
-@test BenchmarkTools.prettymemory(1023) == "1023 bytes"
-@test BenchmarkTools.prettymemory(1024) == "1.00 KiB"
-@test BenchmarkTools.prettymemory(1048575) == "1024.00 KiB"
-@test BenchmarkTools.prettymemory(1048576) == "1.00 MiB"
-@test BenchmarkTools.prettymemory(1073741823) == "1024.00 MiB"
-@test BenchmarkTools.prettymemory(1073741824) == "1.00 GiB"
-
-@test sprint(show, "text/plain", ta) == sprint(show, ta; context=:compact => false) == """
-BenchmarkTools.TrialEstimate: 
-  time:             0.490 ns
-  gctime:           0.000 ns (0.00%)
-  memory:           2 bytes
-  allocs:           1"""
-
-@test sprint(show, ta) == "TrialEstimate(0.490 ns)"
-@test sprint(
-    show, ta;
-    context = IOContext(
-        devnull, :compact => true, :typeinfo => BenchmarkTools.TrialEstimate)
-) == "0.490 ns"
-
-@test sprint(show, [ta, tb]) == "BenchmarkTools.TrialEstimate[0.490 ns, 1.000 ns]"
-
-trial1sample = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [1], [1], 1, 1)
-@test try display(trial1sample); true catch e false end
-
-@static if VERSION < v"1.6-"
-
-@test sprint(show, "text/plain", [ta, tb]) == """
-2-element Array{BenchmarkTools.TrialEstimate,1}:
- 0.490 ns
- 1.000 ns"""
-
-else
-
-@test sprint(show, "text/plain", [ta, tb]) == """
-2-element Vector{BenchmarkTools.TrialEstimate}:
- 0.490 ns
- 1.000 ns"""
-
-end
-
-trial = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [1.0, 1.01], [0.0, 0.0], 0, 0)
-@test sprint(show, "text/plain", trial) == """
-BenchmarkTools.Trial: 2 samples with 1 evaluation.
- Range (min … max):  1.000 ns … 1.010 ns  ┊ GC (min … max): 0.00% … 0.00%
- Time  (median):     1.005 ns             ┊ GC (median):    0.00%
- Time  (mean ± σ):   1.005 ns ± 0.007 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
-
-  █                                                       █  
-  █▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█ ▁
-  1 ns           Histogram: frequency by time       1.01 ns <
-
- Memory estimate: 0 bytes, allocs estimate: 0."""
-
-end # module
diff --git a/test/data/test02_pretty.txt b/test/data/test02_pretty.txt
new file mode 100644
index 00000000..0cd13389
--- /dev/null
+++ b/test/data/test02_pretty.txt
@@ -0,0 +1,27 @@
+BenchmarkExt.TrialEstimate: 
+  time:             0.490 ns
+  gctime:           0.000 ns (0.00%)
+  memory:           2 bytes
+  allocs:           1
+
+
+2-element Array{BenchmarkExt.TrialEstimate,1}:
+ 0.490 ns
+ 1.000 ns
+
+
+2-element Vector{BenchmarkExt.TrialEstimate}:
+ 0.490 ns
+ 1.000 ns
+
+
+BenchmarkExt.Trial: 2 samples with 1 evaluation.
+ Range (min … max):  1.000 ns … 1.010 ns  ┊ GC (min … max): 0.00% … 0.00%
+ Time  (median):     1.005 ns             ┊ GC (median):    0.00%
+ Time  (mean ± σ):   1.005 ns ± 0.007 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%
+
+  █                                                       █  
+  █▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█ ▁
+  1 ns           Histogram: frequency by time       1.01 ns <
+
+ Memory estimate: 0 bytes, allocs estimate: 0.
diff --git a/test/data/test03_pretty.txt b/test/data/test03_pretty.txt
new file mode 100644
index 00000000..95cd7c7b
--- /dev/null
+++ b/test/data/test03_pretty.txt
@@ -0,0 +1,21 @@
+3-element BenchmarkExt.BenchmarkGroup:
+  tags: ["1", "2"]
+  "c" => TrialEstimate(1.000 ns)
+  "b" => TrialEstimate(4.123 μs)
+  "a" => TrialEstimate(32.000 ns)
+
+3-element BenchmarkExt.BenchmarkGroup:
+  tags: ["1", "2"]
+  "c" => TrialEstimate(1.000 ns)
+  ⋮
+
+3-element BenchmarkExt.BenchmarkGroup:
+  tags: ["1", "2"]
+  "c" => TrialEstimate(1.000 ns)
+  "b" => TrialEstimate(4.123 μs)
+  "a" => TrialEstimate(32.000 ns)
+
+3-element BenchmarkExt.BenchmarkGroup:
+  tags: ["1", "2"]
+  "c" => TrialEstimate(1.000 ns)
+  ⋮
diff --git a/test/main.jl b/test/main.jl
new file mode 100644
index 00000000..f1caa8be
--- /dev/null
+++ b/test/main.jl
@@ -0,0 +1,8 @@
+module BenchmarkExtTest
+
+for file in sort([file for file in readdir(@__DIR__) if
+                  occursin(r"^test.*\.jl$", file)])
+    include(file)
+end
+
+end # module
diff --git a/test/runtests.jl b/test/runtests.jl
index e7259482..61efa899 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,19 +1,11 @@
-print("Testing Parameters...")
-took_seconds = @elapsed include("ParametersTests.jl")
-println("done (took ", took_seconds, " seconds)")
-
-print("Testing Trial/TrialEstimate/TrialRatio/TrialJudgement...")
-took_seconds = @elapsed include("TrialsTests.jl")
-println("done (took ", took_seconds, " seconds)")
-
-print("Testing BenchmarkGroup...")
-took_seconds = @elapsed include("GroupsTests.jl")
-println("done (took ", took_seconds, " seconds)")
-
-print("Testing execution...")
-took_seconds = @elapsed include("ExecutionTests.jl")
-println("done (took ", took_seconds, " seconds)")
-
-print("Testing serialization...")
-took_seconds = @elapsed include("SerializationTests.jl")
-println("done (took ", took_seconds, " seconds)")
+include(joinpath(@__DIR__, "main.jl"))
+
+# You can run `Pkg.test("BenchmarkExt", test_args = ["foo", "bar"])` or just
+# `Pkg.test(test_args = ["foo", "bar"])` to select only specific tests. If no `test_args`
+# is given or you are running usual `> ] test` command, then all tests are executed.
+# Strings are used as regexps and you can prepend "-" char before filter match to exclude specific subset of tests, for example `Pkg.test("BenchmarkExt, test_args = ["-foo.*"])` execute all tests except those which starts with `foo`.
+if isempty(ARGS)
+    BenchmarkExtTest.runtests(spin = false)
+else
+    BenchmarkExtTest.runtests(map(arg -> startswith(arg, "-") ? not(Regex(arg[2:end])) : Regex(arg), ARGS), spin = false)
+end
diff --git a/test/test01_Parameters.jl b/test/test01_Parameters.jl
new file mode 100644
index 00000000..d7c84d16
--- /dev/null
+++ b/test/test01_Parameters.jl
@@ -0,0 +1,54 @@
+module ParametersTests
+
+using ReTest
+using BenchmarkExt
+using BenchmarkExt: Parameters
+
+@testset "Parameters test" begin
+    @test BenchmarkExt.DEFAULT_PARAMETERS == Parameters()
+
+    p = Parameters(seconds = 1, gctrial = false)
+    oldseconds = BenchmarkExt.DEFAULT_PARAMETERS.seconds
+    oldgctrial = BenchmarkExt.DEFAULT_PARAMETERS.gctrial
+    BenchmarkExt.DEFAULT_PARAMETERS.seconds = p.seconds
+    BenchmarkExt.DEFAULT_PARAMETERS.gctrial = p.gctrial
+
+    @test p == Parameters()
+    @test Parameters(p; evals = 3, time_tolerance = .32) == Parameters(evals = 3, time_tolerance = .32)
+    
+    BenchmarkExt.DEFAULT_PARAMETERS.seconds = oldseconds
+    BenchmarkExt.DEFAULT_PARAMETERS.gctrial = oldgctrial
+
+    p = Parameters(seconds = 1, gctrial = false, samples = 2, evals = 2, overhead = 42,
+                   gcsample = false, time_tolerance = 0.043, memory_tolerance = 0.15)
+    oldseconds = BenchmarkExt.DEFAULT_PARAMETERS.seconds
+    oldgctrial = BenchmarkExt.DEFAULT_PARAMETERS.gctrial
+    old_time_tolerance = BenchmarkExt.DEFAULT_PARAMETERS.time_tolerance
+    old_memory_tolerance = BenchmarkExt.DEFAULT_PARAMETERS.memory_tolerance
+    oldsamples = BenchmarkExt.DEFAULT_PARAMETERS.samples
+    oldevals = BenchmarkExt.DEFAULT_PARAMETERS.evals
+    oldoverhead = BenchmarkExt.DEFAULT_PARAMETERS.overhead
+    oldgcsample = BenchmarkExt.DEFAULT_PARAMETERS.gcsample
+    BenchmarkExt.DEFAULT_PARAMETERS.seconds = p.seconds
+    BenchmarkExt.DEFAULT_PARAMETERS.gctrial = p.gctrial
+    BenchmarkExt.DEFAULT_PARAMETERS.time_tolerance = p.time_tolerance
+    BenchmarkExt.DEFAULT_PARAMETERS.memory_tolerance = p.memory_tolerance
+    BenchmarkExt.DEFAULT_PARAMETERS.samples = p.samples
+    BenchmarkExt.DEFAULT_PARAMETERS.evals = p.evals
+    BenchmarkExt.DEFAULT_PARAMETERS.overhead = p.overhead
+    BenchmarkExt.DEFAULT_PARAMETERS.gcsample = p.gcsample
+    
+    @test p == Parameters()
+    @test p == Parameters(p)
+    
+    BenchmarkExt.DEFAULT_PARAMETERS.seconds = oldseconds
+    BenchmarkExt.DEFAULT_PARAMETERS.gctrial = oldgctrial
+    BenchmarkExt.DEFAULT_PARAMETERS.time_tolerance = old_time_tolerance
+    BenchmarkExt.DEFAULT_PARAMETERS.memory_tolerance = old_memory_tolerance
+    BenchmarkExt.DEFAULT_PARAMETERS.samples = oldsamples
+    BenchmarkExt.DEFAULT_PARAMETERS.evals = oldevals
+    BenchmarkExt.DEFAULT_PARAMETERS.overhead = oldoverhead
+    BenchmarkExt.DEFAULT_PARAMETERS.gcsample = oldgcsample
+end
+
+end # module
diff --git a/test/test02_Trials.jl b/test/test02_Trials.jl
new file mode 100644
index 00000000..60360c9c
--- /dev/null
+++ b/test/test02_Trials.jl
@@ -0,0 +1,225 @@
+module TrialsTests
+
+using BenchmarkExt
+using ReTest
+using StableRNGs
+
+@testset "Trial" begin
+    trial1 = BenchmarkExt.Trial(BenchmarkExt.Parameters(evals = 2))
+    push!(trial1, 2, 1, 4, 5)
+    push!(trial1, 21, 0, 41, 51)
+
+    trial2 = BenchmarkExt.Trial(BenchmarkExt.Parameters(time_tolerance = 0.15))
+    push!(trial2, 21, 0, 41, 51)
+    push!(trial2, 2, 1, 4, 5)
+
+    push!(trial2, 21, 0, 41, 51)
+    @test length(trial2) == 3
+    deleteat!(trial2, 3)
+    @test length(trial1) == length(trial2) == 2
+    sort!(trial2)
+
+    @test trial1.params == BenchmarkExt.Parameters(evals = trial1.params.evals)
+    @test trial2.params == BenchmarkExt.Parameters(time_tolerance = trial2.params.time_tolerance)
+    @test trial1.times == trial2.times == [2.0, 21.0]
+    @test trial1.gctimes == trial2.gctimes == [1.0, 0.0]
+    @test trial1.memory == trial2.memory ==  [4, 41]
+    @test trial1.allocs == trial2.allocs == [5, 51]
+
+    trial2.params = trial1.params
+
+    @test trial1 == trial2
+
+    @test trial1[2] == push!(BenchmarkExt.Trial(BenchmarkExt.Parameters(evals = 2)), 21, 0, 41, 51)
+    @test trial1[1:end] == trial1
+
+    @test time(trial1) == time(trial2) == 2.0
+    @test gctime(trial1) == gctime(trial2) == 1.0
+    @test memory(trial1) == memory(trial2) == 4.0
+    @test allocs(trial1) == allocs(trial2) == 5.0
+    @test params(trial1) == params(trial2) == trial1.params
+
+    # outlier trimming
+    trial3 = BenchmarkExt.Trial(BenchmarkExt.Parameters(), 
+                                [1, 2, 3, 10, 11],
+                                [1, 1, 1, 1, 1], 
+                                [1, 1, 1, 1, 1], 
+                                [1, 1, 1, 1, 1])
+
+    trimtrial3 = rmskew(trial3)
+    rmskew!(trial3)
+
+    @test mean(trimtrial3) <= median(trimtrial3)
+    @test trimtrial3 == trial3
+end
+
+@testset "TrialEstimate" begin
+    rng = StableRNG(22022022)
+    randtrial = BenchmarkExt.Trial(BenchmarkExt.Parameters())
+
+    for _ in 1:40
+        push!(randtrial, rand(rng, 1:20), 1, 1, 1)
+    end
+
+    while mean(randtrial) <= median(randtrial)
+        push!(randtrial, rand(rng, 10:20), 1, 1, 1)
+    end
+
+    rmskew!(randtrial)
+
+    tmin = minimum(randtrial)
+    tmed = median(randtrial)
+    tmean = mean(randtrial)
+    tmax = maximum(randtrial)
+
+    @test time(tmin) == time(randtrial)
+    @test gctime(tmin) == gctime(randtrial)
+    @test memory(tmin) == memory(tmed) == memory(tmean) == memory(tmax) == memory(randtrial)
+    @test allocs(tmin) == allocs(tmed) == allocs(tmean) == allocs(tmax) == allocs(randtrial)
+    @test params(tmin) == params(tmed) == params(tmean) == params(tmax) == params(randtrial)
+
+    @test tmin <= tmed
+    @test tmean <= tmed # this should be true since we called rmoutliers!(randtrial) earlier
+    @test tmed <= tmax
+end
+
+@testset "TrialRatio" begin
+    randrange = 1.0:0.01:10.0
+    x, y = rand(randrange), rand(randrange)
+
+    @test (ratio(x, y) == x/y) && (ratio(y, x) == y/x)
+    @test (ratio(x, x) == 1.0) && (ratio(y, y) == 1.0)
+    @test ratio(0.0, 0.0) == 1.0
+
+    ta = BenchmarkExt.TrialEstimate(BenchmarkExt.Parameters(), rand(), rand(), rand(Int), rand(Int))
+    tb = BenchmarkExt.TrialEstimate(BenchmarkExt.Parameters(), rand(), rand(), rand(Int), rand(Int))
+    tr = ratio(ta, tb)
+
+    @test time(tr) == ratio(time(ta), time(tb))
+    @test gctime(tr) == ratio(gctime(ta), gctime(tb))
+    @test memory(tr) == ratio(memory(ta), memory(tb))
+    @test allocs(tr) == ratio(allocs(ta), allocs(tb))
+    @test params(tr) == params(ta) == params(tb)
+
+    @test BenchmarkExt.gcratio(ta) == ratio(gctime(ta), time(ta))
+    @test BenchmarkExt.gcratio(tb) == ratio(gctime(tb), time(tb))
+end
+
+@testset "TrialJudgement" begin
+    ta = BenchmarkExt.TrialEstimate(BenchmarkExt.Parameters(time_tolerance = 0.50, memory_tolerance = 0.50), 0.49, 0.0, 2, 1)
+    tb = BenchmarkExt.TrialEstimate(BenchmarkExt.Parameters(time_tolerance = 0.05, memory_tolerance = 0.05), 1.00, 0.0, 1, 1)
+    tr = ratio(ta, tb)
+    tj_ab = judge(ta, tb)
+    tj_r = judge(tr)
+
+    @test ratio(tj_ab) == ratio(tj_r) == tr
+    @test time(tj_ab) == time(tj_r) == :improvement
+    @test memory(tj_ab) == memory(tj_r) == :regression
+    @test tj_ab == tj_r
+
+    tj_ab_2 = judge(ta, tb; time_tolerance = 2.0, memory_tolerance = 2.0)
+    tj_r_2 = judge(tr; time_tolerance = 2.0, memory_tolerance = 2.0)
+
+    @test tj_ab_2 == tj_r_2
+    @test ratio(tj_ab_2) == ratio(tj_r_2)
+    @test time(tj_ab_2) == time(tj_r_2) == :invariant
+    @test memory(tj_ab_2) == memory(tj_r_2) == :invariant
+
+    @test !(isinvariant(tj_ab))
+    @test !(isinvariant(tj_r))
+    @test isinvariant(tj_ab_2)
+    @test isinvariant(tj_r_2)
+
+    @test !(isinvariant(time, tj_ab))
+    @test !(isinvariant(time, tj_r))
+    @test isinvariant(time, tj_ab_2)
+    @test isinvariant(time, tj_r_2)
+
+    @test !(isinvariant(memory, tj_ab))
+    @test !(isinvariant(memory, tj_r))
+    @test isinvariant(memory, tj_ab_2)
+    @test isinvariant(memory, tj_r_2)
+
+    @test isregression(tj_ab)
+    @test isregression(tj_r)
+    @test !(isregression(tj_ab_2))
+    @test !(isregression(tj_r_2))
+
+    @test !(isregression(time, tj_ab))
+    @test !(isregression(time, tj_r))
+    @test !(isregression(time, tj_ab_2))
+    @test !(isregression(time, tj_r_2))
+
+    @test isregression(memory, tj_ab)
+    @test isregression(memory, tj_r)
+    @test !(isregression(memory, tj_ab_2))
+    @test !(isregression(memory, tj_r_2))
+
+    @test isimprovement(tj_ab)
+    @test isimprovement(tj_r)
+    @test !(isimprovement(tj_ab_2))
+    @test !(isimprovement(tj_r_2))
+
+    @test isimprovement(time, tj_ab)
+    @test isimprovement(time, tj_r)
+    @test !(isimprovement(time, tj_ab_2))
+    @test !(isimprovement(time, tj_r_2))
+
+    @test !(isimprovement(memory, tj_ab))
+    @test !(isimprovement(memory, tj_r))
+    @test !(isimprovement(memory, tj_ab_2))
+    @test !(isimprovement(memory, tj_r_2))
+end
+
+@testset "Pretty printing" begin
+    ta = BenchmarkExt.TrialEstimate(BenchmarkExt.Parameters(time_tolerance = 0.50, memory_tolerance = 0.50), 0.49, 0.0, 2, 1)
+    tb = BenchmarkExt.TrialEstimate(BenchmarkExt.Parameters(time_tolerance = 0.05, memory_tolerance = 0.05), 1.00, 0.0, 1, 1)
+    data = read(joinpath(@__DIR__, "data", "test02_pretty.txt"), String)
+    pp = strip.(split(data, "\n\n\n"))
+
+    @test BenchmarkExt.prettypercent(.3120123) == "31.20%"
+
+    @test BenchmarkExt.prettydiff(0.0) == "-100.00%"
+    @test BenchmarkExt.prettydiff(1.0) == "+0.00%"
+    @test BenchmarkExt.prettydiff(2.0) == "+100.00%"
+
+    @test BenchmarkExt.prettytime(999) == "999.000 ns"
+    @test BenchmarkExt.prettytime(1000) == "1.000 μs"
+    @test BenchmarkExt.prettytime(999_999) == "999.999 μs"
+    @test BenchmarkExt.prettytime(1_000_000) == "1.000 ms"
+    @test BenchmarkExt.prettytime(999_999_999) == "1000.000 ms"
+    @test BenchmarkExt.prettytime(1_000_000_000) == "1.000 s"
+
+    @test BenchmarkExt.prettymemory(1023) == "1023 bytes"
+    @test BenchmarkExt.prettymemory(1024) == "1.00 KiB"
+    @test BenchmarkExt.prettymemory(1048575) == "1024.00 KiB"
+    @test BenchmarkExt.prettymemory(1048576) == "1.00 MiB"
+    @test BenchmarkExt.prettymemory(1073741823) == "1024.00 MiB"
+    @test BenchmarkExt.prettymemory(1073741824) == "1.00 GiB"
+
+
+    @test sprint(show, "text/plain", ta) == sprint(show, ta; context=:compact => false) == pp[1]
+
+    @test sprint(show, ta) == "TrialEstimate(0.490 ns)"
+    @test sprint(
+        show, ta;
+        context = IOContext(
+            devnull, :compact => true, :typeinfo => BenchmarkExt.TrialEstimate)
+    ) == "0.490 ns"
+
+    @test sprint(show, [ta, tb]) == "BenchmarkExt.TrialEstimate[0.490 ns, 1.000 ns]"
+
+    trial1sample = BenchmarkExt.Trial(BenchmarkExt.Parameters(), [1], [1], [1], [1])
+    @test try display(trial1sample); true catch e false end
+
+    @static if VERSION < v"1.6-"
+        @test sprint(show, "text/plain", [ta, tb]) == pp[2]
+    else
+        @test sprint(show, "text/plain", [ta, tb]) == pp[3]
+    end
+
+    trial = BenchmarkExt.Trial(BenchmarkExt.Parameters(), [1.0, 1.01], [0.0, 0.0], [0, 0], [0, 0])
+    @test sprint(show, "text/plain", trial) == pp[4]
+end
+
+end # module
diff --git a/test/test03_Groups.jl b/test/test03_Groups.jl
new file mode 100644
index 00000000..8c0e76e9
--- /dev/null
+++ b/test/test03_Groups.jl
@@ -0,0 +1,309 @@
+module GroupsTests
+
+using BenchmarkExt
+using BenchmarkExt: TrialEstimate, Parameters
+using ReTest
+
+seteq(a, b) = length(a) == length(b) == length(intersect(a, b))
+
+function setup_vals()
+    g1 = BenchmarkGroup(["1", "2"])
+
+    t1a = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 32, 1, 2, 3)
+    t1b = TrialEstimate(Parameters(time_tolerance = .40, memory_tolerance = .40), 4123, 123, 43, 9)
+    tc = TrialEstimate(Parameters(time_tolerance = 1.0, memory_tolerance = 1.0), 1, 1, 1, 1)
+
+    g1["a"] = t1a
+    g1["b"] = t1b
+    g1["c"] = tc
+
+    g1copy = copy(g1)
+    g1similar = similar(g1)
+
+    g2 = BenchmarkGroup(["2", "3"])
+
+    t2a = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 323, 1, 2, 3)
+    t2b = TrialEstimate(Parameters(time_tolerance = .40, memory_tolerance = .40), 1002, 123, 43, 9)
+
+    g2["a"] = t2a
+    g2["b"] = t2b
+    g2["c"] = tc
+
+    trial = BenchmarkExt.Trial(Parameters(), [1, 2, 5], [0, 1, 1], [3, 4, 5], [56, 58, 62])
+
+    gtrial = BenchmarkGroup([], Dict("t" => trial))
+
+    (g1, g1copy, g1similar, g2, gtrial, t1a, t1b, tc, t2a, t2b)
+end
+
+function setup_extra(g1, g2, gtrial)
+    groupsa = BenchmarkGroup()
+    groupsa["g1"] = g1
+    groupsa["g2"] = g2
+    g3a = addgroup!(groupsa, "g3", ["3", "4"])
+    g3a["c"] = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 6341, 23, 41, 536)
+    g3a["d"] = TrialEstimate(Parameters(time_tolerance = .13, memory_tolerance = .13), 12341, 3013, 2, 150)
+
+    groups_copy = copy(groupsa)
+    groups_similar = similar(groupsa)
+
+    groupsb = BenchmarkGroup()
+    groupsb["g1"] = g1
+    groupsb["g2"] = g2
+    g3b = addgroup!(groupsb, "g3", ["3", "4"])
+    g3b["c"] = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 1003, 23, 41, 536)
+    g3b["d"] = TrialEstimate(Parameters(time_tolerance = .23, memory_tolerance = .23), 25341, 3013, 2, 150)
+
+    groupstrial = BenchmarkGroup()
+    groupstrial["g"] = gtrial
+
+    (groupsa, g3a, groupsb, g3b, groupstrial, groups_copy, groups_similar)
+end
+
+struct Bar end
+@testset "BenchmarkGroup" begin
+    g1, g1copy, g1similar, g2, gtrial, t1a, t1b, tc, t2a, t2b = setup_vals()
+
+    @test BenchmarkGroup() == BenchmarkGroup([], Dict())
+    @test length(g1) == 3
+    @test g1["a"] == t1a
+    @test g1["b"] == t1b
+    @test g1["c"] == tc
+    @test haskey(g1, "a")
+    @test !(haskey(g1, "x"))
+    @test seteq(keys(g1), ["a", "b", "c"])
+    @test seteq(values(g1), [t1a, t1b, tc])
+    @test iterate(g1) == iterate(g1.data)
+    @test iterate(g1, 1) == iterate(g1.data, 1)
+    @test seteq([x for x in g1], Pair["a"=>t1a, "b"=>t1b, "c"=>tc])
+
+    @test g1 == g1copy
+    @test seteq(keys(delete!(g1copy, "a")), ["b", "c"])
+    @test isempty(delete!(delete!(g1copy, "b"), "c"))
+    @test isempty(g1similar)
+    @test g1similar.tags == g1.tags
+
+    @test time(g1).data == Dict("a" => time(t1a), "b" => time(t1b), "c" => time(tc))
+    @test gctime(g1).data == Dict("a" => gctime(t1a), "b" => gctime(t1b), "c" => gctime(tc))
+    @test memory(g1).data == Dict("a" => memory(t1a), "b" => memory(t1b), "c" => memory(tc))
+    @test allocs(g1).data == Dict("a" => allocs(t1a), "b" => allocs(t1b), "c" => allocs(tc))
+    @test params(g1).data == Dict("a" => params(t1a), "b" => params(t1b), "c" => params(tc))
+
+    @test max(g1, g2).data == Dict("a" => t2a, "b" => t1b, "c" => tc)
+    @test min(g1, g2).data == Dict("a" => t1a, "b" => t2b, "c" => tc)
+    @test ratio(g1, g2).data == Dict("a" => ratio(t1a, t2a), "b" => ratio(t1b, t2b), "c" => ratio(tc, tc))
+    @test (judge(g1, g2; time_tolerance = 0.1, memory_tolerance = 0.1).data ==
+           Dict("a" => judge(t1a, t2a; time_tolerance = 0.1, memory_tolerance = 0.1),
+                "b" => judge(t1b, t2b; time_tolerance = 0.1, memory_tolerance = 0.1),
+                "c" => judge(tc, tc; time_tolerance = 0.1, memory_tolerance = 0.1)))
+    @test (judge(ratio(g1, g2); time_tolerance = 0.1, memory_tolerance = 0.1) ==
+           judge(g1, g2; time_tolerance = 0.1, memory_tolerance = 0.1))
+    @test ratio(g1, g2) == ratio(judge(g1, g2))
+
+    @test isinvariant(judge(g1, g1))
+    @test isinvariant(time, judge(g1, g1))
+    @test isinvariant(memory, judge(g1, g1))
+    @test !(isregression(judge(g1, g1)))
+    @test !(isregression(time, judge(g1, g1)))
+    @test !(isregression(memory, judge(g1, g1)))
+    @test !(isimprovement(judge(g1, g1)))
+    @test !(isimprovement(time, judge(g1, g1)))
+    @test !(isimprovement(memory, judge(g1, g1)))
+
+    @test BenchmarkExt.invariants(judge(g1, g2)).data == Dict("c" => judge(tc, tc))
+    @test BenchmarkExt.invariants(time, (judge(g1, g2))).data == Dict("c" => judge(tc, tc))
+    @test BenchmarkExt.invariants(memory, (judge(g1, g2))).data == Dict("a" => judge(t1a, t2a), "b" => judge(t1b, t2b), "c" => judge(tc, tc))
+    @test BenchmarkExt.regressions(judge(g1, g2)).data == Dict("b" => judge(t1b, t2b))
+    @test BenchmarkExt.regressions(time, (judge(g1, g2))).data == Dict("b" => judge(t1b, t2b))
+    @test BenchmarkExt.regressions(memory, (judge(g1, g2))).data == Dict()
+    @test BenchmarkExt.improvements(judge(g1, g2)).data == Dict("a" => judge(t1a, t2a))
+    @test BenchmarkExt.improvements(time, (judge(g1, g2))).data == Dict("a" => judge(t1a, t2a))
+    @test BenchmarkExt.improvements(memory, (judge(g1, g2))).data == Dict()
+
+    @test isinvariant(judge(g1, g1))
+    @test !(isinvariant(judge(g1, g2)))
+    @test isregression(judge(g1, g2))
+    @test !(isregression(judge(g1, g1)))
+    @test isimprovement(judge(g1, g2))
+    @test !(isimprovement(judge(g1, g1)))
+    @test invariants(judge(g1, g2)).data == Dict("c" => judge(tc, tc))
+    @test regressions(judge(g1, g2)).data == Dict("b" => judge(t1b, t2b))
+    @test improvements(judge(g1, g2)).data == Dict("a" => judge(t1a, t2a))
+
+    @testset "struct Bar" begin
+        @test BenchmarkExt.invariants(Bar()) == Bar()
+        @test BenchmarkExt.invariants(time, (Bar())) == Bar()
+        @test BenchmarkExt.invariants(memory, (Bar())) == Bar()
+        @test BenchmarkExt.regressions(Bar()) == Bar()
+        @test BenchmarkExt.regressions(time, (Bar())) == Bar()
+        @test BenchmarkExt.regressions(memory, (Bar())) == Bar()
+        @test BenchmarkExt.improvements(Bar()) == Bar()
+        @test BenchmarkExt.improvements(time, (Bar())) == Bar()
+        @test BenchmarkExt.improvements(memory, (Bar())) == Bar()
+    end
+
+    @test minimum(gtrial)["t"] == minimum(gtrial["t"])
+    @test median(gtrial)["t"] == median(gtrial["t"])
+    @test mean(gtrial)["t"] == mean(gtrial["t"])
+    @test maximum(gtrial)["t"] == maximum(gtrial["t"])
+    @test params(gtrial)["t"] == params(gtrial["t"])
+end
+
+@testset "BenchmarkGroups of BenchmarkGroups" begin
+    g1, g1copy, g1similar, g2, gtrial, t1a, t1b, tc, t2a, t2b = setup_vals()
+    groupsa, g3a, groupsb, g3b, groupstrial, groups_copy, groups_similar = setup_extra(g1, g2, gtrial)
+
+    @test time(groupsa).data == Dict("g1" => time(g1), "g2" => time(g2), "g3" => time(g3a))
+    @test gctime(groupsa).data == Dict("g1" => gctime(g1), "g2" => gctime(g2), "g3" => gctime(g3a))
+    @test memory(groupsa).data == Dict("g1" => memory(g1), "g2" => memory(g2), "g3" => memory(g3a))
+    @test allocs(groupsa).data == Dict("g1" => allocs(g1), "g2" => allocs(g2), "g3" => allocs(g3a))
+    @test params(groupsa).data == Dict("g1" => params(g1), "g2" => params(g2), "g3" => params(g3a))
+
+    for (k, v) in BenchmarkExt.leaves(groupsa)
+        @test groupsa[k] == v
+    end
+
+    @test max(groupsa, groupsb).data == Dict("g1" => max(g1, g1), "g2" => max(g2, g2), "g3" => max(g3a, g3b))
+    @test min(groupsa, groupsb).data == Dict("g1" => min(g1, g1), "g2" => min(g2, g2), "g3" => min(g3a, g3b))
+    @test ratio(groupsa, groupsb).data == Dict("g1" => ratio(g1, g1), "g2" => ratio(g2, g2), "g3" => ratio(g3a, g3b))
+    @test (judge(groupsa, groupsb; time_tolerance = 0.1, memory_tolerance = 0.1).data ==
+           Dict("g1" => judge(g1, g1; time_tolerance = 0.1, memory_tolerance = 0.1),
+                "g2" => judge(g2, g2; time_tolerance = 0.1, memory_tolerance = 0.1),
+                "g3" => judge(g3a, g3b; time_tolerance = 0.1, memory_tolerance = 0.1)))
+    @test (judge(ratio(groupsa, groupsb); time_tolerance = 0.1, memory_tolerance = 0.1) ==
+           judge(groupsa, groupsb; time_tolerance = 0.1, memory_tolerance = 0.1))
+    @test ratio(groupsa, groupsb) == ratio(judge(groupsa, groupsb))
+
+    @test isinvariant(judge(groupsa, groupsa))
+    @test !(isinvariant(judge(groupsa, groupsb)))
+    @test isregression(judge(groupsa, groupsb))
+    @test !(isregression(judge(groupsa, groupsa)))
+    @test isimprovement(judge(groupsa, groupsb))
+    @test !(isimprovement(judge(groupsa, groupsa)))
+    @test invariants(judge(groupsa, groupsb)).data == Dict("g1" => judge(g1, g1), "g2" => judge(g2, g2))
+    @test regressions(judge(groupsa, groupsb)).data == Dict("g3" => regressions(judge(g3a, g3b)))
+    @test improvements(judge(groupsa, groupsb)).data == Dict("g3" => improvements(judge(g3a, g3b)))
+
+    @test minimum(groupstrial)["g"]["t"] == minimum(groupstrial["g"]["t"])
+    @test maximum(groupstrial)["g"]["t"] == maximum(groupstrial["g"]["t"])
+    @test median(groupstrial)["g"]["t"] == median(groupstrial["g"]["t"])
+    @test mean(groupstrial)["g"]["t"] == mean(groupstrial["g"]["t"])
+    @test params(groupstrial)["g"]["t"] == params(groupstrial["g"]["t"])
+
+end
+
+@testset "Tagging" begin
+    g1, g1copy, g1similar, g2, gtrial, t1a, t1b, tc, t2a, t2b = setup_vals()
+    groupsa, g3a, groupsb, g3b, groupstrial, groups_copy, groups_similar = setup_extra(g1, g2, gtrial)
+
+    @test groupsa[@tagged "1"] == BenchmarkGroup([], "g1" => g1)
+    @test groupsa[@tagged "2"] == BenchmarkGroup([], "g1" => g1, "g2" => g2)
+    @test groupsa[@tagged "3"] == BenchmarkGroup([], "g2" => g2, "g3" => g3a)
+    @test groupsa[@tagged "4"] == BenchmarkGroup([], "g3" => g3a)
+    @test groupsa[@tagged "3" && "4"] == groupsa[@tagged "4"]
+    @test groupsa[@tagged ALL && !("2")] == groupsa[@tagged !("2")]
+    @test groupsa[@tagged "1" || "4"] == BenchmarkGroup([], "g1" => g1, "g3" => g3a)
+    @test groupsa[@tagged ("1" || "4") && !("2")] == groupsa[@tagged "4"]
+    @test groupsa[@tagged !("1" || "4") && "2"] == BenchmarkGroup([], "g2" => g2)
+    @test groupsa[@tagged ALL] == groupsa
+    @test groupsa[@tagged !("1" || "3") && !("4")] == similar(groupsa)
+
+    gnest = BenchmarkGroup(["1"],
+                           "2" => BenchmarkGroup(["3"], 1 => 1),
+                           4 => BenchmarkGroup(["3"], 5 => 6),
+                           7 => 8,
+                           "a" => BenchmarkGroup(["3"], "a" => :a, (11, "b") => :b),
+                           9 => BenchmarkGroup(["2"],
+                                               10 => BenchmarkGroup(["3"]),
+                                               11 => BenchmarkGroup()))
+
+    @test sort(leaves(gnest), by=string) ==
+    Any[(Any["2",1],1), (Any["a","a"],:a), (Any["a",(11,"b")],:b), (Any[4,5],6), (Any[7],8)]
+
+    @test gnest[@tagged 11 || 10] == BenchmarkGroup(["1"],
+                                                    "a" => BenchmarkGroup(["3"],
+                                                                          (11, "b") => :b),
+                                                    9 => gnest[9])
+
+    @test gnest[@tagged "3"] == BenchmarkGroup(["1"], "2" => gnest["2"], 4 => gnest[4], "a" => gnest["a"],
+                                               9 => BenchmarkGroup(["2"], 10 => BenchmarkGroup(["3"])))
+
+    @test gnest[@tagged "1" && "2" && "3"] == BenchmarkGroup(["1"], "2" => gnest["2"],
+                                                             9 => BenchmarkGroup(["2"], 10 => BenchmarkGroup(["3"])))
+
+    k = 3 + im
+    gnest = BenchmarkGroup(["1"], :hi => BenchmarkGroup([], 1 => 1, k => BenchmarkGroup(["3"], 1 => 1)), 2 => 1)
+
+    @test gnest[@tagged "1"] == gnest
+    @test gnest[@tagged "1" && !(:hi)] == BenchmarkGroup(["1"], 2 => 1)
+    @test gnest[@tagged :hi && !("3")] == BenchmarkGroup(["1"], :hi => BenchmarkGroup([], 1 => 1))
+    @test gnest[@tagged k] == BenchmarkGroup(["1"], :hi => BenchmarkGroup([], k => BenchmarkGroup(["3"], 1 => 1)))
+end
+
+@testset "Indexing by BenchmarkGroup" begin
+    g = BenchmarkGroup()
+    d = Dict("1" => 1, "2" => 2, "3" => 3)
+    g["a"] = BenchmarkGroup([], copy(d))
+    g["b"] = BenchmarkGroup([], copy(d))
+    g["c"] = BenchmarkGroup([], copy(d))
+    g["d"] = BenchmarkGroup([], copy(d))
+    g["e"] = BenchmarkGroup([], "1" => BenchmarkGroup([], copy(d)),
+                            "2" => BenchmarkGroup([], copy(d)),
+                            "3" => BenchmarkGroup([], copy(d)))
+
+    x = BenchmarkGroup()
+    x["a"] = BenchmarkGroup([], "1" => '1', "3" => '3')
+    x["c"] = BenchmarkGroup([], "2" => '2')
+    x["d"] = BenchmarkGroup([], "1" => '1', "2" => '2', "3" => '3')
+    x["e"] = BenchmarkGroup([], "1" => x["a"], "3" => x["c"])
+
+    gx = BenchmarkGroup()
+    gx["a"] = BenchmarkGroup([], "1" => 1, "3" => 3)
+    gx["c"] = BenchmarkGroup([], "2" => 2)
+    gx["d"] = BenchmarkGroup([], "1" => 1, "2" => 2, "3" => 3)
+    gx["e"] = BenchmarkGroup([], "1" => g["e"]["1"][x["a"]], "3" => g["e"]["3"][x["c"]])
+
+    @test g[x] == gx
+end
+
+@testset "Indexing by vector" begin
+    g1 = BenchmarkGroup(1 => BenchmarkGroup("a" => BenchmarkGroup()))
+    g1[[1, "a", :b]] = "hello"
+    @test g1[[1, "a", :b]] == "hello"
+
+    g2 = BenchmarkGroup()
+    g2[[1, "a", :b]] = "hello"  # should create higher levels on the fly
+    @test g2[[1, "a", :b]] == "hello"
+
+    @test g1 == g2
+
+    @testset "benchmarkset" begin
+        g1 = @benchmarkset "test set" begin
+            @case "test case 1" 1 + 1
+            @case "test case 2" 2 + 2
+        end
+
+        @test haskey(g1, "test set")
+        @test haskey(g1["test set"], "test case 1")
+        @test haskey(g1["test set"], "test case 2")
+    end
+end
+
+@testset "Pretty printing" begin
+    g1, g1copy, g1similar, g2, gtrial, t1a, t1b, tc, t2a, t2b = setup_vals()
+
+    g1 = BenchmarkGroup(["1", "2"])
+    g1["a"] = t1a
+    g1["b"] = t1b
+    g1["c"] = tc
+
+    data = read(joinpath(@__DIR__, "data", "test03_pretty.txt"), String)
+    pp = strip.(split(data, "\n\n"))
+
+    @test sprint(show, g1) == pp[1]
+    @test sprint(show, g1; context = :boundto => 1) == pp[2]
+    @test sprint(show, g1; context = :limit => false) == pp[3]
+    @test @test_deprecated(sprint(show, g1; context = :limit => 1)) == pp[4]
+end
+
+end # module
diff --git a/test/test04_Execution.jl b/test/test04_Execution.jl
new file mode 100644
index 00000000..f161ddc5
--- /dev/null
+++ b/test/test04_Execution.jl
@@ -0,0 +1,254 @@
+module ExecutionTests
+
+using BenchmarkExt
+using Profile
+using ReTest
+
+seteq(a, b) = length(a) == length(b) == length(intersect(a, b))
+
+#########
+# setup #
+#########
+
+groups = BenchmarkGroup()
+groups["sum"] = BenchmarkGroup(["arithmetic"])
+groups["sin"] = BenchmarkGroup(["trig"])
+groups["special"] = BenchmarkGroup()
+
+sizes = (5, 10, 20)
+
+for s in sizes
+    A = rand(s, s)
+    groups["sum"][s] = @benchmarkable sum($A) seconds=3
+    groups["sin"][s] = @benchmarkable(sin($s), seconds=1, gctrial=false)
+end
+
+groups["special"]["macro"] = @benchmarkable @test(1 == 1)
+groups["special"]["nothing"] = @benchmarkable nothing
+groups["special"]["block"] = @benchmarkable begin rand(3) end
+groups["special"]["comprehension"] = @benchmarkable [s^2 for s in sizes]
+
+function testexpected(received::BenchmarkGroup, expected::BenchmarkGroup)
+    @test length(received) == length(expected)
+    @test seteq(received.tags, expected.tags)
+    @test seteq(keys(received), keys(expected))
+    for (k, v) in received
+        testexpected(v, expected[k])
+    end
+end
+
+function testexpected(trial::BenchmarkExt.Trial, args...)
+    @test length(trial) > 1
+end
+
+testexpected(b::BenchmarkExt.Benchmark, args...) = true
+
+@testset "tune!" begin
+    oldgroups = copy(groups)
+
+    for id in keys(groups["special"])
+        testexpected(tune!(groups["special"][id]))
+    end
+
+    testexpected(tune!(groups["sin"], verbose = true), groups["sin"])
+    testexpected(tune!(groups, verbose = true), groups)
+
+    oldgroupscopy = copy(oldgroups)
+
+    loadparams!(oldgroups, params(groups), :evals, :samples)
+    loadparams!(oldgroups, params(groups))
+
+    @test oldgroups == oldgroupscopy == groups
+end
+
+@testset "run" begin
+    testexpected(run(groups; verbose = true), groups)
+    testexpected(run(groups; seconds = 1, verbose = true, gctrial = false), groups)
+    testexpected(run(groups; verbose = true, seconds = 1, gctrial = false, time_tolerance = 0.10, samples = 2, evals = 2, gcsample = false), groups)
+
+    testexpected(run(groups["sin"]; verbose = true), groups["sin"])
+    testexpected(run(groups["sin"]; seconds = 1, verbose = true, gctrial = false), groups["sin"])
+    testexpected(run(groups["sin"]; verbose = true, seconds = 1, gctrial = false, time_tolerance = 0.10, samples = 2, evals = 2, gcsample = false), groups["sin"])
+
+    testexpected(run(groups["sin"][first(sizes)]))
+    testexpected(run(groups["sin"][first(sizes)]; seconds = 1, gctrial = false))
+    testexpected(run(groups["sin"][first(sizes)]; seconds = 1, gctrial = false, time_tolerance = 0.10, samples = 2, evals = 2, gcsample = false))
+
+    testexpected(run(groups["sum"][first(sizes)], BenchmarkExt.DEFAULT_PARAMETERS))
+end
+
+@testset "Warmup" begin
+    p = params(warmup(@benchmarkable sin(1)))
+
+    @test p.samples == 1
+    @test p.evals == 1
+    @test p.gctrial == false
+    @test p.gcsample == false
+end
+
+mutable struct Foo
+    x::Int
+end
+const foo = Foo(-1)
+@testset "Benchmark execution" begin
+
+
+    t = @benchmark sin(foo.x) evals=3 samples=10 setup=(foo.x = 0)
+
+    @test foo.x == 0
+    @test params(t).evals == 3
+    @test params(t).samples == 10
+
+    b = @benchmarkable sin(x) setup=(foo.x = -1; x = foo.x) teardown=(@assert(x == -1); foo.x = 1)
+    tune!(b)
+
+    @test foo.x == 1
+    @test params(b).evals > 100
+
+    foo.x = 0
+    tune!(b)
+
+    @test foo.x == 1
+    @test params(b).evals > 100
+
+    # test variable assignment with `@benchmark args...` form
+    @benchmark local_var="good" setup=(local_var="bad") teardown=(@test local_var=="good")
+    @test_throws UndefVarError local_var
+    @benchmark some_var="whatever" teardown=(@test_throws UndefVarError some_var)
+    @benchmark foo,bar="good","good" setup=(foo="bad"; bar="bad") teardown=(@test foo=="good" && bar=="good")
+
+    # test variable assignment with `@benchmark(args...)` form
+    @benchmark(local_var="good", setup=(local_var="bad"), teardown=(@test local_var=="good"))
+    @test_throws UndefVarError local_var
+    @benchmark(some_var="whatever", teardown=(@test_throws UndefVarError some_var))
+    @benchmark((foo,bar) = ("good","good"), setup=(foo = "bad"; bar = "bad"), teardown=(@test foo == "good" && bar == "good"))
+
+    # test kwargs separated by `,`
+    @benchmark(output=sin(x), setup=(x=1.0; output=0.0), teardown=(@test output == sin(x)))
+
+    for (tf, rex1, rex2) in ((false, r"0.5 ns +Histogram: frequency by time +8 ns",        r"Histogram: frequency"),
+                             (true,  r"0.5 ns +Histogram: log\(frequency\) by time +8 ns", r"Histogram: log\(frequency\)"))
+        io = IOBuffer()
+        ioctx = IOContext(io, :histmin=>0.5, :histmax=>8, :logbins=>tf)
+        @show tf
+        b = @benchmark x^3   setup=(x = rand()); show(ioctx, MIME("text/plain"), b)
+        b = @benchmark x^3.0 setup=(x = rand()); show(ioctx, MIME("text/plain"), b)
+        str = String(take!(io))
+        idx = findfirst(rex1, str)
+        @test isa(idx, UnitRange)
+        idx = findnext( rex1, str, idx[end]+1)
+        @test isa(idx, UnitRange)
+        ioctx = IOContext(io, :logbins=>tf)
+        # A flat distribution won't trigger log by default
+        b = BenchmarkExt.Trial(BenchmarkExt.DEFAULT_PARAMETERS, 0.001 * (1:100) * 1e9, zeros(100), zeros(Int, 100), zeros(Int, 100))
+        show(ioctx, MIME("text/plain"), b)
+        str = String(take!(io))
+        idx = findfirst(rex2, str)
+        @test isa(idx, UnitRange)
+        # A peaked distribution will trigger log by default
+        t = [fill(1, 21); 2]
+        b = BenchmarkExt.Trial(BenchmarkExt.DEFAULT_PARAMETERS, t/sum(t)*1e9*BenchmarkExt.DEFAULT_PARAMETERS.seconds, zeros(100), zeros(Int, 100), zeros(Int, 100))
+        show(ioctx, MIME("text/plain"), b)
+        str = String(take!(io))
+        idx = findfirst(rex2, str)
+        @test isa(idx, UnitRange)
+    end
+end
+
+function likegcd(a::T, b::T) where T<:Base.BitInteger
+    za = trailing_zeros(a)
+    zb = trailing_zeros(b)
+    k = min(za, zb)
+    u = unsigned(abs(a >> za))
+    v = unsigned(abs(b >> zb))
+    while u != v
+        if u > v
+            u, v = v, u
+        end
+        v -= u
+        v >>= trailing_zeros(v)
+    end
+    r = u << k
+    return r % T
+end
+@testset "bprofile" begin
+
+    b = @bprofile likegcd(x, y) setup=(x = rand(2:200); y = rand(2:200))
+    @test isa(b, BenchmarkExt.Trial)
+    io = IOBuffer()
+    Profile.print(IOContext(io, :displaysize=>(24,200)))
+    str = String(take!(io))
+    @test  occursin(r"BenchmarkExt(\.jl)?(/|\\)src(/|\\)execution\.jl:\d+; _run", str)
+    @test !occursin(r"BenchmarkExt(\.jl)?(/|\\)src(/|\\)execution\.jl:\d+; warmup", str)
+    @test !occursin(r"BenchmarkExt(\.jl)?(/|\\)src(/|\\)execution\.jl:\d+; tune!", str)
+    b = @bprofile 1+1
+    Profile.print(IOContext(io, :displaysize=>(24,200)))
+    str = String(take!(io))
+    @test !occursin("gcscrub", str)
+    b = @bprofile 1+1 gctrial=true
+    Profile.print(IOContext(io, :displaysize=>(24,200)))
+    str = String(take!(io))
+    @test  occursin("gcscrub", str)
+end
+
+@testset "Misc" begin
+    # This test is volatile in nonquiescent environments (e.g. Travis)
+    # BenchmarkExt.DEFAULT_PARAMETERS.overhead = BenchmarkExt.estimate_overhead()
+    # @test time(minimum(@benchmark nothing)) == 1
+
+    @test [:x, :y, :z, :v, :w] == BenchmarkExt.collectvars(quote
+                                                               x = 1 + 3
+                                                               y = 1 + x
+                                                               z = (a = 4; y + a)
+                                                               v,w = 1,2
+                                                               [u^2 for u in [1,2,3]]
+                                                           end)
+
+    # this should take < 1 s on any sane machine
+    @test @belapsed(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) < 1
+    @test @belapsed(sin(0)) < 1
+
+    @test @ballocated(sin($(foo.x)), evals=3, samples=10, setup=(foo.x = 0)) == 0
+    @test @ballocated(sin(0)) == 0
+    @test @ballocated(Ref(1)) == 2*sizeof(Int)  # 1 for the pointer, 1 for content
+
+    let fname = tempname()
+        try
+            ret = open(fname, "w") do f
+                redirect_stdout(f) do
+                    x = 1
+                    a = nothing
+                    y = @btime(sin($x))
+                    @test y == sin(1)
+                    @test a === nothing
+                end
+            end
+            s = read(fname, String)
+            try
+                @test occursin(r"[0-9.]+ \w*s \([0-9]* allocations?: [0-9]+ bytes\)", s)
+            catch
+                println(stderr, "@btime output didn't match ", repr(s))
+                rethrow()
+            end
+        finally
+            isfile(fname) && rm(fname)
+        end
+    end
+
+    # issue #107
+    let time = 2
+        @benchmark identity(time)
+    end
+end
+
+#TODO: This test is not working in ReTest, find the reason and fix it
+@testset "Interpolated values are garbage-collectable" begin
+    # x = []
+    # x_finalized = false
+    # finalizer(x->(global x_finalized=true), x)
+    # b = @benchmarkable $x
+    # b = x = nothing
+    # @test x_finalized
+end
+
+end # module
diff --git a/test/SerializationTests.jl b/test/test05_Serialization.jl
similarity index 64%
rename from test/SerializationTests.jl
rename to test/test05_Serialization.jl
index 87900769..13950ed4 100644
--- a/test/SerializationTests.jl
+++ b/test/test05_Serialization.jl
@@ -1,9 +1,9 @@
 module SerializationTests
 
-using BenchmarkTools
-using Test
+using BenchmarkExt
+using ReTest
 
-eq(x::T, y::T) where {T<:Union{values(BenchmarkTools.SUPPORTED_TYPES)...}} =
+eq(x::T, y::T) where {T<:Union{values(BenchmarkExt.SUPPORTED_TYPES)...}} =
     all(i->eq(getfield(x, i), getfield(y, i)), 1:fieldcount(T))
 eq(x::T, y::T) where {T} = isapprox(x, y)
 
@@ -25,10 +25,10 @@ end
     withtempdir() do
         tmp = joinpath(pwd(), "tmp.json")
 
-        BenchmarkTools.save(tmp, b.params, bb)
+        BenchmarkExt.save(tmp, b.params, bb)
         @test isfile(tmp)
 
-        results = BenchmarkTools.load(tmp)
+        results = BenchmarkExt.load(tmp)
         @test results isa Vector{Any}
         @test length(results) == 2
         @test eq(results[1], b.params)
@@ -43,9 +43,9 @@ end
         g["a"] = BenchmarkGroup()
         g["b"] = BenchmarkGroup()
         g["c"] = BenchmarkGroup()
-        BenchmarkTools.save(tmp, g)
+        BenchmarkExt.save(tmp, g)
 
-        results = BenchmarkTools.load(tmp)[1]
+        results = BenchmarkExt.load(tmp)[1]
         @test results isa BenchmarkGroup
         @test all(v->v isa BenchmarkGroup, values(results.data))
     end
@@ -56,23 +56,23 @@ end
     tune!(b)
     bb = run(b)
 
-    @test_throws ArgumentError BenchmarkTools.save("x.jld", b.params)
-    @test_throws ArgumentError BenchmarkTools.save("x.txt", b.params)
-    @test_throws ArgumentError BenchmarkTools.save("x.json")
-    @test_throws ArgumentError BenchmarkTools.save("x.json", 1)
+    @test_throws ArgumentError BenchmarkExt.save("x.jld", b.params)
+    @test_throws ArgumentError BenchmarkExt.save("x.txt", b.params)
+    @test_throws ArgumentError BenchmarkExt.save("x.json")
+    @test_throws ArgumentError BenchmarkExt.save("x.json", 1)
 
     withtempdir() do
         tmp = joinpath(pwd(), "tmp.json")
-        @test_logs (:warn, r"Naming variables") BenchmarkTools.save(tmp, "b", b.params)
+        @test_logs (:warn, r"Naming variables") BenchmarkExt.save(tmp, "b", b.params)
         @test isfile(tmp)
-        results = BenchmarkTools.load(tmp)
+        results = BenchmarkExt.load(tmp)
         @test length(results) == 1
         @test eq(results[1], b.params)
     end
 
-    @test_throws ArgumentError BenchmarkTools.load("x.jld")
-    @test_throws ArgumentError BenchmarkTools.load("x.txt")
-    @test_throws ArgumentError BenchmarkTools.load("x.json", "b")
+    @test_throws ArgumentError BenchmarkExt.load("x.jld")
+    @test_throws ArgumentError BenchmarkExt.load("x.txt")
+    @test_throws ArgumentError BenchmarkExt.load("x.json", "b")
 end
 
 @testset "Error checking" begin
@@ -84,7 +84,7 @@ end
             """)
         end
         try
-            BenchmarkTools.load(tmp)
+            BenchmarkExt.load(tmp)
             error("madness")
         catch err
             # This function thows a bunch of errors, so test for this specifically
@@ -92,7 +92,7 @@ end
         end
     end
 
-    @test_throws ArgumentError BenchmarkTools.recover([1])
+    @test_throws ArgumentError BenchmarkExt.recover([1])
 end
 
 end # module