From dd69ff0cd642deceda65fa45ab0c4eba2ad9e69c Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 15 Dec 2017 19:32:27 -0500 Subject: [PATCH 1/8] Add the `Timers` module to measure real-time and cpu-time This code is based on the `google/benchmarks` repository. - `realtime` is a monotonic high-resolution clock provided by the kernel. In most cases this is equivalent to `time_ns` in `Base`. - `cputime` is the time that the process was actually running on a processor. This excludes time spend in the kernel and time spend descheduled. The `Timers` module includes a fair-amount of platform specific code so the implementation is split between a generic *unix* implementation based on the high-resoltion clocked provided through `clock_gettime`. On *darwin* prior to 10.12.0 the kernel only provides `CLOCK_MONOTONIC` making it less useful and we have to fallback onto `getRUsage`. We intentional don't do error handling for the timer calls. Which may or may not be a wise choice. --- LICENSE.md | 3 ++- src/BenchmarkTools.jl | 10 ++++++++- src/timers/darwin.jl | 52 +++++++++++++++++++++++++++++++++++++++++++ src/timers/timers.jl | 37 ++++++++++++++++++++++++++++++ src/timers/unix.jl | 48 +++++++++++++++++++++++++++++++++++++++ src/timers/windows.jl | 43 +++++++++++++++++++++++++++++++++++ 6 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 src/timers/darwin.jl create mode 100644 src/timers/timers.jl create mode 100644 src/timers/unix.jl create mode 100644 src/timers/windows.jl diff --git a/LICENSE.md b/LICENSE.md index 87cba3b7..0aa37b31 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,7 @@ The BenchmarkTools.jl package is licensed under the MIT "Expat" License: -> Copyright (c) 2015: Jarrett Revels. +> Copyright (c) 2015-2018: Jarrett Revels and other contributors: +> https://github.com/JuliaCI/BenchmarkTools.jl/contributors > > Permission is hereby granted, free of charge, to any person obtaining > a copy of this software and associated documentation files (the diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl index 284f11a2..ecf5a403 100644 --- a/src/BenchmarkTools.jl +++ b/src/BenchmarkTools.jl @@ -12,6 +12,12 @@ end const BENCHMARKTOOLS_VERSION = v"0.2.2" +########## +# Timers # +########## + +include("timers/timers.jl") + ############## # Parameters # ############## @@ -26,7 +32,9 @@ export loadparams! include("trials.jl") -export gctime, +export realtime, + cputime, + gctime, memory, allocs, params, diff --git a/src/timers/darwin.jl b/src/timers/darwin.jl new file mode 100644 index 00000000..c126f2d0 --- /dev/null +++ b/src/timers/darwin.jl @@ -0,0 +1,52 @@ +## +# This file is a part of BenchmarkTools.jl. License is MIT +# +# Based upon https://github.com/google/benchmark, which is licensed under Apache v2: +# https://github.com/google/benchmark/blob/master/LICENSE +# +# In compliance with the Apache v2 license, here are the original copyright notices: +# Copyright 2015 Google Inc. All rights reserved. +## + +const RUSAGE_SELF = Cint(0) +const RUSAGE_CHILDREN = Cint(-1) + +struct TimeVal + tv_sec::Clong + tv_usec::Clong +end + +struct RUsage + ru_utime::TimeVal + ru_stime::TimeVal + ru_maxrss::Clong + ru_ixrss::Clong + ru_idrss::Clong + ru_isrss::Clong + ru_minflt::Clong + ru_majflt::Clong + ru_nswap::Clong + ru_inblock::Clong + ru_outblock::Clong + ru_msgsnd::Clong + ru_msgrcv::Clong + ru_nsignals::Clong + ru_nvcsw::Clong + ru_nivcsw::Clong +end + +@inline function maketime(ru::RUsage) + user = ru.ru_utime.tv_sec * 1e9 + ru.ru_utime.tv_usec *1e3 + kernel = ru.ru_stime.tv_sec * 1e9 + ru.ru_stime.tv_usec *1e3 + return user+kernel +end + +@inline function realtime() + Float64(Base.time_ns()) +end + +@inline function cputime() + ru = Ref{RUsage}() + ccall(:getRUsage, Cint, (Cint, Ref{RUsage}), RUSAGE_SELF, ru) + return maketime(ru[]) +end diff --git a/src/timers/timers.jl b/src/timers/timers.jl new file mode 100644 index 00000000..d1643090 --- /dev/null +++ b/src/timers/timers.jl @@ -0,0 +1,37 @@ +# This file is a part of BenchmarkTools.jl. License is MIT + +module Timers +import Compat + +""" + realtime() + +Monotonic runtime counter + +Returns time in ns as Float64. +""" +function realtime end + +""" + cputime() + +Process specific CPU time clock. + +Returns time in ns as Float64. +""" +function cputime end + +function _applever() + return VersionNumber(readchomp(`sw_vers -productVersion`)) +end + +if Compat.Sys.isapple() && _applever() < v"10.12.0" + include("darwin.jl") +elseif Compat.Sys.isunix() + include("unix.jl") +elseif Compat.Sys.iswindows() + include("windows.jl") +else + error("$(Sys.KERNEL) is not supported please file an issue") +end +end # module \ No newline at end of file diff --git a/src/timers/unix.jl b/src/timers/unix.jl new file mode 100644 index 00000000..5cb19e5f --- /dev/null +++ b/src/timers/unix.jl @@ -0,0 +1,48 @@ +## +# This file is a part of BenchmarkTools.jl. License is MIT +# +# Based upon https://github.com/google/benchmark, which is licensed under Apache v2: +# https://github.com/google/benchmark/blob/master/LICENSE +# +# In compliance with the Apache v2 license, here are the original copyright notices: +# Copyright 2015 Google Inc. All rights reserved. +## + +struct TimeSpec + tv_sec :: UInt64 # time_t + tv_nsec :: UInt64 +end + +maketime(ts) = ts.tv_sec * 1e9 + ts.tv_nsec + +# From bits/times.h on a Linux system +# Check if those are the same on BSD +if Compat.Sys.islinux() + const CLOCK_MONOTONIC = Cint(1) + const CLOCK_PROCESS_CPUTIME_ID = Cint(2) +elseif Sys.KERNEL == :FreeBSD # atleast on FreeBSD 11.1 + const CLOCK_MONOTONIC = Cint(4) + const CLOCK_PROCESS_CPUTIME_ID = Cint(14) +elseif Compat.Sys.isapple() # Version 10.12 required + const CLOCK_MONOTONIC = Cint(6) + const CLOCK_PROCESS_CPUTIME_ID = Cint(12) +else + error(""" + BenchmarkTools doesn't currently support your operating system. + Please file an issue, your kernel is $(Sys.KERNEL) + """) +end + +@inline function clock_gettime(cid) + ts = Ref{TimeSpec}() + ccall(:clock_gettime, Cint, (Cint, Ref{TimeSpec}), cid, ts) + return ts[] +end + +@inline function realtime() + maketime(clock_gettime(CLOCK_MONOTONIC)) +end + +@inline function cputime() + maketime(clock_gettime(CLOCK_PROCESS_CPUTIME_ID)) +end diff --git a/src/timers/windows.jl b/src/timers/windows.jl new file mode 100644 index 00000000..80dba472 --- /dev/null +++ b/src/timers/windows.jl @@ -0,0 +1,43 @@ +## +# This file is a part of BenchmarkTools.jl. License is MIT +# +# Based upon https://github.com/google/benchmark, which is licensed under Apache v2: +# https://github.com/google/benchmark/blob/master/LICENSE +# +# In compliance with the Apache v2 license, here are the original copyright notices: +# Copyright 2015 Google Inc. All rights reserved. +## + +""" + FileTime + +See https://msdn.microsoft.com/en-us/library/windows/desktop/ms724284(v=vs.85).aspx +""" +struct FileTime + dwLowDateTime::UInt32 + dwHighDateTime::UInt32 +end + +const HANDLE = Ptr{Void} + +@inline function maketime(kernel_time::FileTime, user_time::FileTime) + kernel = (kernel_time.dwHighDateTime % UInt64) << 32 | kernel_time.dwLowDateTime + user = ( user_time.dwHighDateTime % UInt64) << 32 | user_time.dwLowDateTime + (kernel + user) * 1e2 +end + +@inline function realtime() + return Float64(Base.time_ns()) +end + +@inline function cputime() + proc = ccall(:GetCurrentProcess, HANDLE, ()) + creation_time = Ref{FileTime}() + exit_time = Ref{FileTime}() + kernel_time = Ref{FileTime}() + user_time = Ref{FileTime}() + + ccall(:GetProcessTimes, Cint, (HANDLE, Ref{FileTime}, Ref{FileTime}, Ref{FileTime}, Ref{FileTime}), + proc, creation_time, exit_time, kernel_time, user_time) + return maketime(kernel_time[], user_time[]) +end From beea80c25fb8badd10751be59f1fc1e9bda6a5e6 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 19 Jan 2018 11:52:00 -0500 Subject: [PATCH 2/8] Measure both real-time and cpu-time Renames `times` and `time` to `realtime` and `realtimes` throughout and adds `cputime` and `cputimes`. Real-time is the actual monotonic run time of the code and cpu-time is the time spend in userspace running on processor. ``` julia> @btime sin(1) 7.572 ns [100.00% CPU, 0.00% GC] (0 allocations: 0 bytes) 0.8414709848078965 julia> @btime sleep(1) 1.002 s [0.01% CPU, 0.00% GC] (5 allocations: 192 bytes) ``` --- src/BenchmarkTools.jl | 10 ++++ src/execution.jl | 17 ++++-- src/groups.jl | 3 +- src/parameters.jl | 8 ++- src/serialization.jl | 2 +- src/trials.jl | 128 ++++++++++++++++++++++++----------------- test/ExecutionTests.jl | 4 +- test/GroupsTests.jl | 26 +++++---- test/TrialsTests.jl | 58 +++++++++++-------- 9 files changed, 152 insertions(+), 104 deletions(-) diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl index ecf5a403..534d5db4 100644 --- a/src/BenchmarkTools.jl +++ b/src/BenchmarkTools.jl @@ -80,4 +80,14 @@ export tune!, include("serialization.jl") +################# +# Deprecations # +################# +import Base: time +@deprecate time(t::Trial) realtime(t) +@deprecate time(t::TrialJudgement) realtime(t) +@deprecate time(t::TrialEstimate) realtime(t) +@deprecate time(t::TrialRatio) realtime(t) +@deprecate time(group::BenchmarkGroup) realtime(group) + end # module BenchmarkTools diff --git a/src/execution.jl b/src/execution.jl index c28e3480..cf09b54c 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -314,21 +314,24 @@ function generate_benchmark_definition(eval_module, out_vars, setup_vars, core, $(setup) __evals = __params.evals __gc_start = Base.gc_num() - __start_time = time_ns() + __start_realtime = $BenchmarkTools.Timers.realtime() + __start_cputime = $BenchmarkTools.Timers.cputime() __return_val = $(invocation) for __iter in 2:__evals $(invocation) end - __sample_time = time_ns() - __start_time + __sample_realtime = $BenchmarkTools.Timers.realtime() - __start_realtime + __sample_cputime = $BenchmarkTools.Timers.cputime() - __start_cputime __gcdiff = Base.GC_Diff(Base.gc_num(), __gc_start) $(teardown) - __time = max((__sample_time / __evals) - __params.overhead, 0.001) + __realtime = max((__sample_realtime / __evals) - __params.overhead, 0.001) + __cputime = max((__sample_cputime / __evals) - __params.overhead, 0.001) __gctime = max((__gcdiff.total_time / __evals) - __params.overhead, 0.0) __memory = Int(fld(__gcdiff.allocd, __evals)) __allocs = Int(fld(__gcdiff.malloc + __gcdiff.realloc + __gcdiff.poolalloc + __gcdiff.bigalloc, __evals)) - return __time, __gctime, __memory, __allocs, __return_val + return __realtime, __cputime, __gctime, __memory, __allocs, __return_val end function $BenchmarkTools.sample(b::$BenchmarkTools.Benchmark{$(id)}, p::$BenchmarkTools.Parameters = b.params) @@ -379,7 +382,7 @@ is the *minimum* elapsed time measured during the benchmark. macro belapsed(args...) b = Expr(:macrocall, Symbol("@benchmark"), map(esc, args)...) return esc(quote - $BenchmarkTools.time($BenchmarkTools.minimum($BenchmarkTools.@benchmark $(args...)))/1e9 + $BenchmarkTools.realtime($BenchmarkTools.minimum($BenchmarkTools.@benchmark $(args...)))/1e9 end) end @@ -409,7 +412,9 @@ macro btime(args...) $trialmin = $BenchmarkTools.minimum($trial) $trialallocs = $BenchmarkTools.allocs($trialmin) println(" ", - $BenchmarkTools.prettytime($BenchmarkTools.time($trialmin)), + $BenchmarkTools.prettytime($BenchmarkTools.realtime($trialmin)), " [", + $BenchmarkTools.prettypercent($BenchmarkTools.cpuratio($trialmin)), " CPU, ", + $BenchmarkTools.prettypercent($BenchmarkTools.gcratio($trialmin)), " GC]", " (", $trialallocs , " allocation", $trialallocs == 1 ? "" : "s", ": ", $BenchmarkTools.prettymemory($BenchmarkTools.memory($trialmin)), ")") diff --git a/src/groups.jl b/src/groups.jl index 1c3db32c..37fda3b9 100644 --- a/src/groups.jl +++ b/src/groups.jl @@ -81,7 +81,8 @@ Base.median(group::BenchmarkGroup) = mapvals(median, group) Base.min(groups::BenchmarkGroup...) = mapvals(min, groups...) Base.max(groups::BenchmarkGroup...) = mapvals(max, groups...) -Base.time(group::BenchmarkGroup) = mapvals(time, group) +realtime(group::BenchmarkGroup) = mapvals(realtime, group) +cputime(group::BenchmarkGroup) = mapvals(cputime, group) gctime(group::BenchmarkGroup) = mapvals(gctime, group) memory(group::BenchmarkGroup) = mapvals(memory, group) allocs(group::BenchmarkGroup) = mapvals(allocs, group) diff --git a/src/parameters.jl b/src/parameters.jl index b31e39a7..015bb95c 100644 --- a/src/parameters.jl +++ b/src/parameters.jl @@ -75,12 +75,14 @@ end @noinline nullfunc() = nothing @noinline function overhead_sample(evals) - start_time = time_ns() + start_realtime = Timers.realtime() + start_cputime = Timers.cputime() for _ in 1:evals nullfunc() end - sample_time = time_ns() - start_time - return (sample_time / evals) + sample_realtime = Timers.realtime() - start_realtime + sample_cputime = Timers.cputtime() - start_cputime + return (sample_realtime / evals, sample_cputime / evals) end function estimate_overhead() diff --git a/src/serialization.jl b/src/serialization.jl index 4ea66dcf..f568017e 100644 --- a/src/serialization.jl +++ b/src/serialization.jl @@ -1,5 +1,5 @@ if VERSION >= v"0.7.0-DEV.2437" - using Base.Meta.parse + using Base.Meta: parse end const VERSIONS = Dict("Julia" => string(VERSION), diff --git a/src/trials.jl b/src/trials.jl index 0509e4c7..2a571904 100644 --- a/src/trials.jl +++ b/src/trials.jl @@ -4,26 +4,29 @@ mutable struct Trial params::Parameters - times::Vector{Float64} + realtimes::Vector{Float64} + cputimes::Vector{Float64} gctimes::Vector{Float64} memory::Int allocs::Int end -Trial(params::Parameters) = Trial(params, Float64[], Float64[], typemax(Int), typemax(Int)) +Trial(params::Parameters) = Trial(params, Float64[], Float64[], Float64[], typemax(Int), typemax(Int)) @compat function Base.:(==)(a::Trial, b::Trial) return a.params == b.params && - a.times == b.times && + a.realtimes == b.realtimes && + a.cputimes == b.cputimes && a.gctimes == b.gctimes && a.memory == b.memory && a.allocs == b.allocs end -Base.copy(t::Trial) = Trial(copy(t.params), copy(t.times), copy(t.gctimes), t.memory, t.allocs) +Base.copy(t::Trial) = Trial(copy(t.params), copy(t.realtimes), copy(t.cputimes), copy(t.gctimes), t.memory, t.allocs) -function Base.push!(t::Trial, time, gctime, memory, allocs) - push!(t.times, time) +function Base.push!(t::Trial, realtime, cputime, gctime, memory, allocs) + push!(t.realtimes, realtime) + push!(t.cputimes, cputime) push!(t.gctimes, gctime) memory < t.memory && (t.memory = memory) allocs < t.allocs && (t.allocs = allocs) @@ -31,26 +34,29 @@ function Base.push!(t::Trial, time, gctime, memory, allocs) end function Base.deleteat!(t::Trial, i) - deleteat!(t.times, i) + deleteat!(t.realtimes, i) + deleteat!(t.cputimes, i) deleteat!(t.gctimes, i) return t end -Base.length(t::Trial) = length(t.times) -Base.getindex(t::Trial, i::Number) = push!(Trial(t.params), t.times[i], t.gctimes[i], t.memory, t.allocs) -Base.getindex(t::Trial, i) = Trial(t.params, t.times[i], t.gctimes[i], t.memory, t.allocs) +Base.length(t::Trial) = length(t.realtimes) +Base.getindex(t::Trial, i::Number) = push!(Trial(t.params), t.realtimes[i], t.cputimes[i], t.gctimes[i], t.memory, t.allocs) +Base.getindex(t::Trial, i) = Trial(t.params, t.realtimes[i], t.cputimes[i], t.gctimes[i], t.memory, t.allocs) Base.endof(t::Trial) = length(t) function Base.sort!(t::Trial) - inds = sortperm(t.times) - t.times = t.times[inds] + inds = sortperm(t.realtimes) + t.realtimes = t.realtimes[inds] + t.cputimes = t.cputimes[inds] t.gctimes = t.gctimes[inds] return t end Base.sort(t::Trial) = sort!(copy(t)) -Base.time(t::Trial) = time(minimum(t)) +realtime(t::Trial) = realtime(minimum(t)) +cputime(t::Trial) = cputime(minimum(t)) gctime(t::Trial) = gctime(minimum(t)) memory(t::Trial) = t.memory allocs(t::Trial) = t.allocs @@ -66,7 +72,7 @@ function skewcutoff(values) return length(current_values) + 1 end -skewcutoff(t::Trial) = skewcutoff(t.times) +skewcutoff(t::Trial) = skewcutoff(t.realtimes) function rmskew!(t::Trial) sort!(t) @@ -88,19 +94,21 @@ trim(t::Trial, percentage = 0.1) = t[1:max(1, floor(Int, length(t) - (length(t) mutable struct TrialEstimate params::Parameters - time::Float64 + realtime::Float64 + cputime::Float64 gctime::Float64 memory::Int allocs::Int end -function TrialEstimate(trial::Trial, t, gct) - return TrialEstimate(params(trial), t, gct, memory(trial), allocs(trial)) +function TrialEstimate(trial::Trial, realtime, cputime, gctime) + return TrialEstimate(params(trial), realtime, cputime, gctime, memory(trial), allocs(trial)) end @compat function Base.:(==)(a::TrialEstimate, b::TrialEstimate) return a.params == b.params && - a.time == b.time && + a.realtime == b.realtime && + a.cputime == b.cputime && a.gctime == b.gctime && a.memory == b.memory && a.allocs == b.allocs @@ -109,21 +117,22 @@ end Base.copy(t::TrialEstimate) = TrialEstimate(copy(t.params), t.time, t.gctime, t.memory, t.allocs) function Base.minimum(trial::Trial) - i = indmin(trial.times) - return TrialEstimate(trial, trial.times[i], trial.gctimes[i]) + i = indmin(trial.realtimes) + return TrialEstimate(trial, trial.realtimes[i], trial.cputimes[i], trial.gctimes[i]) end function Base.maximum(trial::Trial) - i = indmax(trial.times) - return TrialEstimate(trial, trial.times[i], trial.gctimes[i]) + i = indmax(trial.realtimes) + return TrialEstimate(trial, trial.realtimes[i], trial.cputimes[i], trial.gctimes[i]) end -Base.median(trial::Trial) = TrialEstimate(trial, median(trial.times), median(trial.gctimes)) -Base.mean(trial::Trial) = TrialEstimate(trial, mean(trial.times), mean(trial.gctimes)) +Base.median(trial::Trial) = TrialEstimate(trial, median(trial.realtimes), median(trial.cputimes), median(trial.gctimes)) +Base.mean(trial::Trial) = TrialEstimate(trial, mean(trial.realtimes), mean(trial.cputimes), mean(trial.gctimes)) -Base.isless(a::TrialEstimate, b::TrialEstimate) = isless(time(a), time(b)) +Base.isless(a::TrialEstimate, b::TrialEstimate) = isless(realtime(a), realtime(b)) -Base.time(t::TrialEstimate) = t.time +realtime(t::TrialEstimate) = t.realtime +cputime(t::TrialEstimate) = t.cputime gctime(t::TrialEstimate) = t.gctime memory(t::TrialEstimate) = t.memory allocs(t::TrialEstimate) = t.allocs @@ -135,7 +144,8 @@ params(t::TrialEstimate) = t.params mutable struct TrialRatio params::Parameters - time::Float64 + realtime::Float64 + cputime::Float64 gctime::Float64 memory::Float64 allocs::Float64 @@ -143,15 +153,17 @@ end @compat function Base.:(==)(a::TrialRatio, b::TrialRatio) return a.params == b.params && - a.time == b.time && + a.realtime == b.realtime && + a.cputime == b.cputime && a.gctime == b.gctime && a.memory == b.memory && a.allocs == b.allocs end -Base.copy(t::TrialRatio) = TrialRatio(copy(t.params), t.time, t.gctime, t.memory, t.allocs) +Base.copy(t::TrialRatio) = TrialRatio(copy(t.params), t.realtime, t.cputime, t.gctime, t.memory, t.allocs) -Base.time(t::TrialRatio) = t.time +realtime(t::TrialRatio) = t.realtime +cputime(t::TrialRatio) = t.cputime gctime(t::TrialRatio) = t.gctime memory(t::TrialRatio) = t.memory allocs(t::TrialRatio) = t.allocs @@ -168,11 +180,13 @@ function ratio(a::TrialEstimate, b::TrialEstimate) ttol = max(params(a).time_tolerance, params(b).time_tolerance) mtol = max(params(a).memory_tolerance, params(b).memory_tolerance) p = Parameters(params(a); time_tolerance = ttol, memory_tolerance = mtol) - return TrialRatio(p, ratio(time(a), time(b)), ratio(gctime(a), gctime(b)), - ratio(memory(a), memory(b)), ratio(allocs(a), allocs(b))) + return TrialRatio(p, ratio(realtime(a), realtime(b)), ratio(cputime(a), cputime(b)), + ratio(gctime(a), gctime(b)), ratio(memory(a), memory(b)), + ratio(allocs(a), allocs(b))) end -gcratio(t::TrialEstimate) = ratio(gctime(t), time(t)) +gcratio(t::TrialEstimate) = ratio(gctime(t), realtime(t)) +cpuratio(t::TrialEstimate) = ratio(cputime(t), realtime(t)) ################## # TrialJudgement # @@ -180,25 +194,28 @@ gcratio(t::TrialEstimate) = ratio(gctime(t), time(t)) struct TrialJudgement ratio::TrialRatio - time::Symbol + realtime::Symbol + cputime::Symbol memory::Symbol end function TrialJudgement(r::TrialRatio) ttol = params(r).time_tolerance mtol = params(r).memory_tolerance - return TrialJudgement(r, judge(time(r), ttol), judge(memory(r), mtol)) + return TrialJudgement(r, judge(realtime(r), ttol), judge(cputime(r), ttol), judge(memory(r), mtol)) end @compat function Base.:(==)(a::TrialJudgement, b::TrialJudgement) return a.ratio == b.ratio && - a.time == b.time && + a.realtime == b.realtime && + a.cputime == b.cputime && a.memory == b.memory end -Base.copy(t::TrialJudgement) = TrialJudgement(copy(t.params), t.time, t.memory) +Base.copy(t::TrialJudgement) = TrialJudgement(copy(t.params), t.realtime, t.cputime, t.memory) -Base.time(t::TrialJudgement) = t.time +realtime(t::TrialJudgement) = t.realtime +cputime(t::TrialJudgement) = t.cputime memory(t::TrialJudgement) = t.memory ratio(t::TrialJudgement) = t.ratio params(t::TrialJudgement) = params(ratio(t)) @@ -221,9 +238,9 @@ function judge(ratio::Real, tolerance::Float64) end end -isimprovement(t::TrialJudgement) = time(t) == :improvement || memory(t) == :improvement -isregression(t::TrialJudgement) = time(t) == :regression || memory(t) == :regression -isinvariant(t::TrialJudgement) = time(t) == :invariant && memory(t) == :invariant +isimprovement(t::TrialJudgement) = realtime(t) == :improvement || cputime(t) == :improvement || memory(t) == :improvement +isregression(t::TrialJudgement) = realtime(t) == :regression || cputime(t) == :regression || memory(t) == :regression +isinvariant(t::TrialJudgement) = realtime(t) == :invariant && cputime(t) == :invariant && memory(t) == :invariant ################### # Pretty Printing # @@ -262,10 +279,10 @@ function prettymemory(b) return string(@sprintf("%.2f", value), " ", units) end -Base.show(io::IO, t::Trial) = print(io, "Trial(", prettytime(time(t)), ")") -Base.show(io::IO, t::TrialEstimate) = print(io, "TrialEstimate(", prettytime(time(t)), ")") -Base.show(io::IO, t::TrialRatio) = print(io, "TrialRatio(", prettypercent(time(t)), ")") -Base.show(io::IO, t::TrialJudgement) = print(io, "TrialJudgement(", prettydiff(time(ratio(t))), " => ", time(t), ")") +Base.show(io::IO, t::Trial) = print(io, "Trial(", prettytime(realtime(t)), ")") +Base.show(io::IO, t::TrialEstimate) = print(io, "TrialEstimate(", prettytime(realtime(t)), ")") +Base.show(io::IO, t::TrialRatio) = print(io, "TrialRatio(", prettypercent(realtime(t)), ")") +Base.show(io::IO, t::TrialJudgement) = print(io, "TrialJudgement(", prettydiff(realtime(ratio(t))), " => ", realtime(t), ")") @compat function Base.show(io::IO, ::MIME"text/plain", t::Trial) if length(t) > 0 @@ -275,10 +292,10 @@ Base.show(io::IO, t::TrialJudgement) = print(io, "TrialJudgement(", prettydiff(t avg = mean(t) memorystr = string(prettymemory(memory(min))) allocsstr = string(allocs(min)) - minstr = string(prettytime(time(min)), " (", prettypercent(gcratio(min)), " GC)") - maxstr = string(prettytime(time(med)), " (", prettypercent(gcratio(med)), " GC)") - medstr = string(prettytime(time(avg)), " (", prettypercent(gcratio(avg)), " GC)") - meanstr = string(prettytime(time(max)), " (", prettypercent(gcratio(max)), " GC)") + minstr = string(prettytime(realtime(min)), " (", prettypercent(cpuratio(min)) ," CPU, ", prettypercent(gcratio(min)), " GC)") + maxstr = string(prettytime(realtime(med)), " (", prettypercent(cpuratio(med)) ," CPU, ", prettypercent(gcratio(med)), " GC)") + medstr = string(prettytime(realtime(avg)), " (", prettypercent(cpuratio(avg)) ," CPU, ", prettypercent(gcratio(avg)), " GC)") + meanstr = string(prettytime(realtime(max)), " (", prettypercent(cpuratio(max)) ," CPU, ", prettypercent(gcratio(max)), " GC)") else memorystr = "N/A" allocsstr = "N/A" @@ -302,15 +319,17 @@ end @compat function Base.show(io::IO, ::MIME"text/plain", t::TrialEstimate) println(io, "BenchmarkTools.TrialEstimate: ") - println(io, " time: ", prettytime(time(t))) - println(io, " gctime: ", prettytime(gctime(t)), " (", prettypercent(gctime(t) / time(t)),")") + println(io, " realtime: ", prettytime(realtime(t))) + println(io, " cputime: ", prettytime(cputime(t)), " (", prettypercent(cputime(t) / realtime(t)),")") + println(io, " gctime: ", prettytime(gctime(t)), " (", prettypercent(gctime(t) / realtime(t)),")") println(io, " memory: ", prettymemory(memory(t))) print(io, " allocs: ", allocs(t)) end @compat function Base.show(io::IO, ::MIME"text/plain", t::TrialRatio) println(io, "BenchmarkTools.TrialRatio: ") - println(io, " time: ", time(t)) + println(io, " realtime: ", realtime(t)) + println(io, " cputime: ", cputime(t)) println(io, " gctime: ", gctime(t)) println(io, " memory: ", memory(t)) print(io, " allocs: ", allocs(t)) @@ -318,6 +337,7 @@ end @compat function Base.show(io::IO, ::MIME"text/plain", t::TrialJudgement) println(io, "BenchmarkTools.TrialJudgement: ") - println(io, " time: ", prettydiff(time(ratio(t))), " => ", time(t), " (", prettypercent(params(t).time_tolerance), " tolerance)") - print(io, " memory: ", prettydiff(memory(ratio(t))), " => ", memory(t), " (", prettypercent(params(t).memory_tolerance), " tolerance)") + println(io, " realtime: ", prettydiff(realtime(ratio(t))), " => ", realtime(t), " (", prettypercent(params(t).time_tolerance), " tolerance)") + println(io, " cputime: ", prettydiff(cputime(ratio(t))), " => ", cputime(t), " (", prettypercent(params(t).time_tolerance), " tolerance)") + print(io, " memory: ", prettydiff(memory(ratio(t))), " => ", memory(t), " (", prettypercent(params(t).memory_tolerance), " tolerance)") end diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl index d79f3f73..9f7f7924 100644 --- a/test/ExecutionTests.jl +++ b/test/ExecutionTests.jl @@ -147,7 +147,7 @@ tune!(b) # This test is volatile in nonquiescent environments (e.g. Travis) # BenchmarkTools.DEFAULT_PARAMETERS.overhead = BenchmarkTools.estimate_overhead() -# @test time(minimum(@benchmark nothing)) == 1 +# @test realtime(minimum(@benchmark nothing)) == 1 @test [:x, :y, :z, :v, :w] == BenchmarkTools.collectvars(quote x = 1 + 3 @@ -174,7 +174,7 @@ let fname = tempname() end s = read(fname, String) try - @test ismatch(r"[0-9.]+ \w*s \([0-9]* allocations?: [0-9]+ bytes\)", s) + @test ismatch(r"[0-9.]+ \w*s \[[0-9.]+\% CPU, [0-9.]+\% GC\] \([0-9]* allocations?: [0-9]+ bytes\)", s) catch println(STDERR, "@btime output didn't match ", repr(s)) rethrow() diff --git a/test/GroupsTests.jl b/test/GroupsTests.jl index c46f186a..574362fe 100644 --- a/test/GroupsTests.jl +++ b/test/GroupsTests.jl @@ -16,9 +16,9 @@ seteq(a, b) = length(a) == length(b) == length(intersect(a, b)) g1 = BenchmarkGroup(["1", "2"]) -t1a = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 32, 1, 2, 3) -t1b = TrialEstimate(Parameters(time_tolerance = .40, memory_tolerance = .40), 4123, 123, 43, 9) -tc = TrialEstimate(Parameters(time_tolerance = 1.0, memory_tolerance = 1.0), 1, 1, 1, 1) +t1a = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 32, 32, 1, 2, 3) +t1b = TrialEstimate(Parameters(time_tolerance = .40, memory_tolerance = .40), 4123, 4123, 123, 43, 9) +tc = TrialEstimate(Parameters(time_tolerance = 1.0, memory_tolerance = 1.0), 1, 1, 1, 1, 1) g1["a"] = t1a g1["b"] = t1b @@ -29,14 +29,14 @@ g1similar = similar(g1) g2 = BenchmarkGroup(["2", "3"]) -t2a = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 323, 1, 2, 3) -t2b = TrialEstimate(Parameters(time_tolerance = .40, memory_tolerance = .40), 1002, 123, 43, 9) +t2a = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 323, 323, 1, 2, 3) +t2b = TrialEstimate(Parameters(time_tolerance = .40, memory_tolerance = .40), 1002, 1002, 123, 43, 9) g2["a"] = t2a g2["b"] = t2b g2["c"] = tc -trial = BenchmarkTools.Trial(Parameters(), [1, 2, 5], [0, 1, 1], 3, 56) +trial = BenchmarkTools.Trial(Parameters(), [1, 2, 5], [1, 2, 5], [0, 1, 1], 3, 56) gtrial = BenchmarkGroup([], Dict("t" => trial)) @@ -63,7 +63,8 @@ gtrial = BenchmarkGroup([], Dict("t" => trial)) @test isempty(g1similar) @test g1similar.tags == g1.tags -@test time(g1).data == Dict("a" => time(t1a), "b" => time(t1b), "c" => time(tc)) +@test realtime(g1).data == Dict("a" => realtime(t1a), "b" => realtime(t1b), "c" => realtime(tc)) +@test cputime(g1).data == Dict("a" => cputime(t1a), "b" => cputime(t1b), "c" => cputime(tc)) @test gctime(g1).data == Dict("a" => gctime(t1a), "b" => gctime(t1b), "c" => gctime(tc)) @test memory(g1).data == Dict("a" => memory(t1a), "b" => memory(t1b), "c" => memory(tc)) @test allocs(g1).data == Dict("a" => allocs(t1a), "b" => allocs(t1b), "c" => allocs(tc)) @@ -107,8 +108,8 @@ groupsa = BenchmarkGroup() groupsa["g1"] = g1 groupsa["g2"] = g2 g3a = addgroup!(groupsa, "g3", ["3", "4"]) -g3a["c"] = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 6341, 23, 41, 536) -g3a["d"] = TrialEstimate(Parameters(time_tolerance = .13, memory_tolerance = .13), 12341, 3013, 2, 150) +g3a["c"] = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 6341, 6341, 23, 41, 536) +g3a["d"] = TrialEstimate(Parameters(time_tolerance = .13, memory_tolerance = .13), 12341, 12341, 3013, 2, 150) groups_copy = copy(groupsa) groups_similar = similar(groupsa) @@ -117,8 +118,8 @@ groupsb = BenchmarkGroup() groupsb["g1"] = g1 groupsb["g2"] = g2 g3b = addgroup!(groupsb, "g3", ["3", "4"]) -g3b["c"] = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 1003, 23, 41, 536) -g3b["d"] = TrialEstimate(Parameters(time_tolerance = .23, memory_tolerance = .23), 25341, 3013, 2, 150) +g3b["c"] = TrialEstimate(Parameters(time_tolerance = .05, memory_tolerance = .05), 1003, 1003, 23, 41, 536) +g3b["d"] = TrialEstimate(Parameters(time_tolerance = .23, memory_tolerance = .23), 25341, 25341, 3013, 2, 150) groupstrial = BenchmarkGroup() groupstrial["g"] = gtrial @@ -126,7 +127,8 @@ groupstrial["g"] = gtrial # tests # #-------# -@test time(groupsa).data == Dict("g1" => time(g1), "g2" => time(g2), "g3" => time(g3a)) +@test realtime(groupsa).data == Dict("g1" => realtime(g1), "g2" => realtime(g2), "g3" => realtime(g3a)) +@test cputime(groupsa).data == Dict("g1" => cputime(g1), "g2" => cputime(g2), "g3" => cputime(g3a)) @test gctime(groupsa).data == Dict("g1" => gctime(g1), "g2" => gctime(g2), "g3" => gctime(g3a)) @test memory(groupsa).data == Dict("g1" => memory(g1), "g2" => memory(g2), "g3" => memory(g3a)) @test allocs(groupsa).data == Dict("g1" => allocs(g1), "g2" => allocs(g2), "g3" => allocs(g3a)) diff --git a/test/TrialsTests.jl b/test/TrialsTests.jl index 27cac295..dddffd26 100644 --- a/test/TrialsTests.jl +++ b/test/TrialsTests.jl @@ -9,14 +9,14 @@ using Compat.Test ######### trial1 = BenchmarkTools.Trial(BenchmarkTools.Parameters(evals = 2)) -push!(trial1, 2, 1, 4, 5) -push!(trial1, 21, 0, 41, 51) +push!(trial1, 2, 2, 1, 4, 5) +push!(trial1, 21, 21, 0, 41, 51) trial2 = BenchmarkTools.Trial(BenchmarkTools.Parameters(time_tolerance = 0.15)) -push!(trial2, 21, 0, 41, 51) -push!(trial2, 2, 1, 4, 5) +push!(trial2, 21, 21, 0, 41, 51) +push!(trial2, 2, 2, 1, 4, 5) -push!(trial2, 21, 0, 41, 51) +push!(trial2, 21, 21, 0, 41, 51) @test length(trial2) == 3 deleteat!(trial2, 3) @test length(trial1) == length(trial2) == 2 @@ -24,7 +24,8 @@ sort!(trial2) @test trial1.params == BenchmarkTools.Parameters(evals = trial1.params.evals) @test trial2.params == BenchmarkTools.Parameters(time_tolerance = trial2.params.time_tolerance) -@test trial1.times == trial2.times == [2.0, 21.0] +@test trial1.realtimes == trial2.realtimes == [2.0, 21.0] +@test trial1.cputimes == trial2.cputimes == [2.0, 21.0] @test trial1.gctimes == trial2.gctimes == [1.0, 0.0] @test trial1.memory == trial2.memory == 4 @test trial1.allocs == trial2.allocs == 5 @@ -33,18 +34,19 @@ trial2.params = trial1.params @test trial1 == trial2 -@test trial1[2] == push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(evals = 2)), 21, 0, 4, 5) +@test trial1[2] == push!(BenchmarkTools.Trial(BenchmarkTools.Parameters(evals = 2)), 21, 21, 0, 4, 5) @test trial1[1:end] == trial1 -@test time(trial1) == time(trial2) == 2.0 -@test gctime(trial1) == gctime(trial2) == 1.0 -@test memory(trial1) == memory(trial2) == trial1.memory -@test allocs(trial1) == allocs(trial2) == trial1.allocs -@test params(trial1) == params(trial2) == trial1.params +@test realtime(trial1) == realtime(trial2) == 2.0 +@test cputime(trial1) == cputime(trial2) == 2.0 +@test gctime(trial1) == gctime(trial2) == 1.0 +@test memory(trial1) == memory(trial2) == trial1.memory +@test allocs(trial1) == allocs(trial2) == trial1.allocs +@test params(trial1) == params(trial2) == trial1.params # outlier trimming trial3 = BenchmarkTools.Trial(BenchmarkTools.Parameters(), [1, 2, 3, 10, 11], - [1, 1, 1, 1, 1], 1, 1) + [1, 2, 3, 10, 11], [1, 1, 1, 1, 1], 1, 1) trimtrial3 = rmskew(trial3) rmskew!(trial3) @@ -59,11 +61,11 @@ rmskew!(trial3) randtrial = BenchmarkTools.Trial(BenchmarkTools.Parameters()) for _ in 1:40 - push!(randtrial, rand(1:20), 1, 1, 1) + push!(randtrial, rand(1:20), 1, 1, 1, 1) end while mean(randtrial) <= median(randtrial) - push!(randtrial, rand(10:20), 1, 1, 1) + push!(randtrial, rand(10:20), 1, 1, 1, 1) end rmskew!(randtrial) @@ -73,7 +75,7 @@ tmed = median(randtrial) tmean = mean(randtrial) tmax = maximum(randtrial) -@test time(tmin) == time(randtrial) +@test realtime(tmin) == realtime(randtrial) @test gctime(tmin) == gctime(randtrial) @test memory(tmin) == memory(tmed) == memory(tmean) == memory(tmax) == memory(randtrial) @test allocs(tmin) == allocs(tmed) == allocs(tmean) == allocs(tmax) == allocs(randtrial) @@ -94,31 +96,36 @@ x, y = rand(randrange), rand(randrange) @test (ratio(x, x) == 1.0) && (ratio(y, y) == 1.0) @test ratio(0.0, 0.0) == 1.0 -ta = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(), rand(), rand(), rand(Int), rand(Int)) -tb = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(), rand(), rand(), rand(Int), rand(Int)) +ta = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(), rand(), rand(), rand(), rand(Int), rand(Int)) +tb = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(), rand(), rand(), rand(), rand(Int), rand(Int)) tr = ratio(ta, tb) -@test time(tr) == ratio(time(ta), time(tb)) +@test realtime(tr) == ratio(realtime(ta), realtime(tb)) +@test cputime(tr) == ratio(cputime(ta), cputime(tb)) @test gctime(tr) == ratio(gctime(ta), gctime(tb)) @test memory(tr) == ratio(memory(ta), memory(tb)) @test allocs(tr) == ratio(allocs(ta), allocs(tb)) @test params(tr) == params(ta) == params(tb) -@test BenchmarkTools.gcratio(ta) == ratio(gctime(ta), time(ta)) -@test BenchmarkTools.gcratio(tb) == ratio(gctime(tb), time(tb)) +@test BenchmarkTools.gcratio(ta) == ratio(gctime(ta), realtime(ta)) +@test BenchmarkTools.gcratio(tb) == ratio(gctime(tb), realtime(tb)) + +@test BenchmarkTools.cpuratio(ta) == ratio(cputime(ta), realtime(ta)) +@test BenchmarkTools.cpuratio(tb) == ratio(cputime(tb), realtime(tb)) ################## # TrialJudgement # ################## -ta = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(time_tolerance = 0.50, memory_tolerance = 0.50), 0.49, 0.0, 2, 1) -tb = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(time_tolerance = 0.05, memory_tolerance = 0.05), 1.00, 0.0, 1, 1) +ta = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(time_tolerance = 0.50, memory_tolerance = 0.50), 0.49, 0.49, 0.0, 2, 1) +tb = BenchmarkTools.TrialEstimate(BenchmarkTools.Parameters(time_tolerance = 0.05, memory_tolerance = 0.05), 1.00, 1.00, 0.0, 1, 1) tr = ratio(ta, tb) tj_ab = judge(ta, tb) tj_r = judge(tr) @test ratio(tj_ab) == ratio(tj_r) == tr -@test time(tj_ab) == time(tj_r) == :improvement +@test realtime(tj_ab) == realtime(tj_r) == :improvement +@test cputime(tj_ab) == cputime(tj_r) == :improvement @test memory(tj_ab) == memory(tj_r) == :regression @test tj_ab == tj_r @@ -127,7 +134,8 @@ tj_r_2 = judge(tr; time_tolerance = 2.0, memory_tolerance = 2.0) @test tj_ab_2 == tj_r_2 @test ratio(tj_ab_2) == ratio(tj_r_2) -@test time(tj_ab_2) == time(tj_r_2) == :invariant +@test realtime(tj_ab_2) == realtime(tj_r_2) == :invariant +@test cputime(tj_ab_2) == cputime(tj_r_2) == :invariant @test memory(tj_ab_2) == memory(tj_r_2) == :invariant @test !(isinvariant(tj_ab)) From 8d0dc1706b56d235c00450575cf0f55b6160cea5 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 19 Jan 2018 11:58:33 -0500 Subject: [PATCH 3/8] update internal version to v0.3.0 due to API change --- src/BenchmarkTools.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl index 534d5db4..bab416f2 100644 --- a/src/BenchmarkTools.jl +++ b/src/BenchmarkTools.jl @@ -10,7 +10,7 @@ if VERSION >= v"0.7.0-DEV.3052" using Printf end -const BENCHMARKTOOLS_VERSION = v"0.2.2" +const BENCHMARKTOOLS_VERSION = v"0.3.0" ########## # Timers # From 2d18443f5022ddbe4d4469a51717312a12e0da19 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 19 Jan 2018 15:53:15 -0500 Subject: [PATCH 4/8] update manual --- README.md | 8 +- doc/manual.md | 224 ++++++++++++++++++++++++++--------------------- doc/reference.md | 8 +- 3 files changed, 136 insertions(+), 104 deletions(-) diff --git a/README.md b/README.md index cee53212..f66c5be2 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ The simplest usage is via the [`@btime` macro](https://github.com/JuliaCI/Benchm julia> using BenchmarkTools, Compat # you need to use both modules julia> @btime sin(1) - 15.081 ns (0 allocations: 0 bytes) + 7.317 ns [100.10% CPU, 0.00% GC] (0 allocations: 0 bytes) 0.8414709848078965 ``` @@ -47,13 +47,13 @@ If the expression you want to benchmark depends on external variables, you shoul julia> A = rand(3,3); julia> @btime inv($A); # we interpolate the global variable A with $A - 1.191 μs (10 allocations: 2.31 KiB) + 545.840 ns [100.02% CPU, 0.00% GC] (5 allocations: 1.98 KiB) julia> @btime inv($(rand(3,3))); # interpolation: the rand(3,3) call occurs before benchmarking - 1.192 μs (10 allocations: 2.31 KiB) + 547.738 ns [100.03% CPU, 0.00% GC] (5 allocations: 1.98 KiB) julia> @btime inv(rand(3,3)); # the rand(3,3) call is included in the benchmark time - 1.295 μs (11 allocations: 2.47 KiB) + 621.614 ns [100.03% CPU, 0.00% GC] (6 allocations: 2.14 KiB) ``` As described the [manual](doc/manual.md), the BenchmarkTools package supports many other features, both for additional output and for more fine-grained control over the benchmarking process. diff --git a/doc/manual.md b/doc/manual.md index 5d70ee01..0b886248 100644 --- a/doc/manual.md +++ b/doc/manual.md @@ -44,6 +44,20 @@ Before we get too far, let's define some of the terminology used in this documen The reasoning behind our definition of "sample" may not be obvious to all readers. If the time to execute a benchmark is smaller than the resolution of your timing method, than a single evaluation of the benchmark will generally not produce a valid sample. In that case, one must approximate a valid sample by recording the total time `t` it takes to record `n` evaluations, and estimating the sample's time per evaluation as `t/n`. For example, if a sample takes 1 second for 1 million evaluations, the approximate time per evaluation for that sample is 1 microsecond. It's not obvious what the right number of evaluations per sample should be for any given benchmark, so BenchmarkTools provides a mechanism (the `tune!` method) to automatically figure it out for you. +## The difference between real-time and cpu-time +BenchmarkTools measures both real-time and cpu-time. Real-time is the time that has passed while running your program and cpu-time is the time +that a processor has spend executing your program. As an example when the operating system deschedules your program, real-time is still passing, +but cpu-time is paused, this is also the case when your program spends time in the kernel or is blocked waiting. As an example take a program that +is doing `sleep(1)`. + +```julia +julia> @btime sleep(1) + 1.001 s [0.01% CPU, 0.00% GC] (5 allocations: 192 bytes) +``` + +As we can see, when we call sleep our program is not using CPU cycles and thus cpu-time is `0.01%` of our actual runtime. Do to sligh differences +in accounting cpu-time can sometimes exceed `100%`. + # Benchmarking basics ### Defining and executing benchmarks @@ -56,13 +70,13 @@ BenchmarkTools.Trial: memory estimate: 0 bytes allocs estimate: 0 -------------- - minimum time: 13.610 ns (0.00% GC) - median time: 13.622 ns (0.00% GC) - mean time: 13.638 ns (0.00% GC) - maximum time: 21.084 ns (0.00% GC) + minimum time: 7.561 ns (100.15% CPU, 0.00% GC) + median time: 8.205 ns (100.10% CPU, 0.00% GC) + mean time: 9.499 ns (99.98% CPU, 0.00% GC) + maximum time: 37.434 ns (85.45% CPU, 0.00% GC) -------------- samples: 10000 - evals/sample: 998 + evals/sample: 999 ``` The `@benchmark` macro is essentially shorthand for defining a benchmark, auto-tuning the benchmark's configuration parameters, and running the benchmark. These three steps can be done explicitly using `@benchmarkable`, `tune!` and `run`: @@ -78,13 +92,13 @@ BenchmarkTools.Trial: memory estimate: 0 bytes allocs estimate: 0 -------------- - minimum time: 13.605 ns (0.00% GC) - median time: 13.618 ns (0.00% GC) - mean time: 13.631 ns (0.00% GC) - maximum time: 22.383 ns (0.00% GC) + minimum time: 7.803 ns (99.97% CPU, 0.00% GC) + median time: 7.843 ns (100.00% CPU, 0.00% GC) + mean time: 8.266 ns (100.00% CPU, 0.00% GC) + maximum time: 40.949 ns (100.07% CPU, 0.00% GC) -------------- samples: 10000 - evals/sample: 998 + evals/sample: 999 ``` Alternatively, you can use the `@btime` or `@belapsed` macros. @@ -96,11 +110,11 @@ returns the minimum time in seconds. ```julia julia> @btime sin(1) - 13.612 ns (0 allocations: 0 bytes) + 7.812 ns [100.01% CPU, 0.00% GC] (0 allocations: 0 bytes) 0.8414709848078965 julia> @belapsed sin(1) -1.3614228456913828e-8 +7.801801801801802e-9 ``` ### Benchmark `Parameters` @@ -143,10 +157,10 @@ BenchmarkTools.Trial: memory estimate: 7.94 KiB allocs estimate: 1 -------------- - minimum time: 1.566 μs (0.00% GC) - median time: 2.135 μs (0.00% GC) - mean time: 3.071 μs (25.06% GC) - maximum time: 296.818 μs (95.91% GC) + minimum time: 1.089 μs (100.17% CPU, 0.00% GC) + median time: 1.217 μs (100.12% CPU, 0.00% GC) + mean time: 2.109 μs (100.08% CPU, 23.86% GC) + maximum time: 2.878 ms (100.00% CPU, 99.83% GC) -------------- samples: 10000 evals/sample: 10 @@ -158,13 +172,13 @@ BenchmarkTools.Trial: memory estimate: 0 bytes allocs estimate: 0 -------------- - minimum time: 101.627 ns (0.00% GC) - median time: 101.909 ns (0.00% GC) - mean time: 103.834 ns (0.00% GC) - maximum time: 276.033 ns (0.00% GC) + minimum time: 69.214 ns (100.00% CPU, 0.00% GC) + median time: 69.240 ns (100.00% CPU, 0.00% GC) + mean time: 74.160 ns (99.99% CPU, 0.00% GC) + maximum time: 285.691 ns (28.64% CPU, 0.00% GC) -------------- samples: 10000 - evals/sample: 935 + evals/sample: 976 ``` A good rule of thumb is that **external variables should be explicitly interpolated into the benchmark expression**: @@ -178,27 +192,28 @@ BenchmarkTools.Trial: memory estimate: 7.95 KiB allocs estimate: 2 -------------- - minimum time: 13.154 μs (0.00% GC) - median time: 13.806 μs (0.00% GC) - mean time: 14.071 μs (0.00% GC) - maximum time: 337.462 μs (0.00% GC) + minimum time: 744.123 ns (100.05% CPU, 0.00% GC) + median time: 796.974 ns (100.04% CPU, 0.00% GC) + mean time: 1.014 μs (100.02% CPU, 14.78% GC) + maximum time: 249.740 μs (100.00% CPU, 99.39% GC) -------------- samples: 10000 - evals/sample: 1 + evals/sample: 114 # GOOD: A is a constant value in the benchmarking context julia> @benchmark [i*i for i in $A] +BenchmarkTools.Trial: BenchmarkTools.Trial: memory estimate: 7.95 KiB allocs estimate: 2 -------------- - minimum time: 929.375 ns (0.00% GC) - median time: 1.348 μs (0.00% GC) - mean time: 2.405 μs (36.64% GC) - maximum time: 91.481 μs (95.46% GC) + minimum time: 684.689 ns (100.05% CPU, 0.00% GC) + median time: 731.242 ns (100.04% CPU, 0.00% GC) + mean time: 890.597 ns (100.05% CPU, 14.26% GC) + maximum time: 190.899 μs (100.00% CPU, 99.30% GC) -------------- samples: 10000 - evals/sample: 32 + evals/sample: 151 ``` (Note that "KiB" is the SI prefix for a [kibibyte](https://en.wikipedia.org/wiki/Kibibyte): 1024 bytes.) @@ -259,12 +274,12 @@ BenchmarkTools.Trial: memory estimate: 0 bytes allocs estimate: 0 -------------- - minimum time: 5.739 ms (0.00% GC) - median time: 5.757 ms (0.00% GC) - mean time: 5.871 ms (0.00% GC) - maximum time: 62.138 ms (0.00% GC) + minimum time: 4.943 ms (100.00% CPU, 0.00% GC) + median time: 5.232 ms (99.85% CPU, 0.00% GC) + mean time: 5.299 ms (99.92% CPU, 0.00% GC) + maximum time: 6.979 ms (100.05% CPU, 0.00% GC) -------------- - samples: 805 + samples: 919 evals/sample: 1 ``` @@ -282,10 +297,10 @@ BenchmarkTools.Trial: memory estimate: 0 bytes allocs estimate: 0 -------------- - minimum time: 2.293 ns (0.00% GC) - median time: 2.302 ns (0.00% GC) - mean time: 2.330 ns (0.00% GC) - maximum time: 6.441 ns (0.00% GC) + minimum time: 1.799 ns (100.00% CPU, 0.00% GC) + median time: 1.909 ns (99.95% CPU, 0.00% GC) + mean time: 1.971 ns (99.86% CPU, 0.00% GC) + maximum time: 15.005 ns (13.95% CPU, 0.00% GC) -------------- samples: 10000 evals/sample: 1000 @@ -300,13 +315,13 @@ BenchmarkTools.Trial: memory estimate: 64 bytes allocs estimate: 1 -------------- - minimum time: 15.613 ns (0.00% GC) - median time: 17.825 ns (0.00% GC) - mean time: 23.358 ns (17.46% GC) - maximum time: 1.725 μs (95.12% GC) + minimum time: 7.364 ns (100.12% CPU, 0.00% GC) + median time: 11.302 ns (100.05% CPU, 0.00% GC) + mean time: 16.692 ns (100.04% CPU, 27.54% GC) + maximum time: 27.181 μs (100.00% CPU, 99.89% GC) -------------- samples: 10000 - evals/sample: 998 + evals/sample: 999 ``` The key point here is that these two benchmarks measure different things, even though their code is similar. In the first example, Julia was able to optimize away `view(a, 1:2, 1:2)` because it could prove that the value wasn't being returned and `a` wasn't being mutated. In the second example, the optimization is not performed because `view(a, 1:2, 1:2)` is a return value of the benchmark expression. @@ -331,32 +346,33 @@ Running a benchmark produces an instance of the `Trial` type: ```julia julia> t = @benchmark eig(rand(10, 10)) BenchmarkTools.Trial: - memory estimate: 9.30 KiB - allocs estimate: 28 + memory estimate: 16.25 KiB + allocs estimate: 20 -------------- - minimum time: 33.262 μs (0.00% GC) - median time: 38.618 μs (0.00% GC) - mean time: 39.981 μs (2.65% GC) - maximum time: 2.814 ms (95.07% GC) + minimum time: 20.802 μs (100.19% CPU, 0.00% GC) + median time: 24.214 μs (100.19% CPU, 0.00% GC) + mean time: 29.984 μs (100.12% CPU, 13.96% GC) + maximum time: 29.755 ms (99.91% CPU, 99.75% GC) -------------- samples: 10000 evals/sample: 1 julia> dump(t) # here's what's actually stored in a Trial - BenchmarkTools.Trial - params: BenchmarkTools.Parameters # Trials store the parameters of their parent process - seconds: Float64 5.0 - samples: Int64 10000 - evals: Int64 1 - overhead: Float64 0.0 - gctrial: Bool true - gcsample: Bool false - time_tolerance: Float64 0.05 - memory_tolerance: Float64 0.01 - times: Array{Float64}((10000,)) [33262.0, 33793.0, … 2.77342e6, 2.81368e6] # every sample is stored in the Trial - gctimes: Array{Float64}((10000,)) [0.0, 0.0, … 2.66614e6, 2.67486e6] - memory: Int64 9520 - allocs: Int64 28 +BenchmarkTools.Trial + params: BenchmarkTools.Parameters + seconds: Float64 5.0 + samples: Int64 10000 + evals: Int64 1 + overhead: Float64 0.0 + gctrial: Bool true + gcsample: Bool false + time_tolerance: Float64 0.05 + memory_tolerance: Float64 0.01 + realtimes: Array{Float64}((10000,)) [20802.0, 20831.0, 20854.0 … 702013.0, 9.4493e6, 2.97552e7] + cputimes: Array{Float64}((10000,)) [20841.0, 20872.0, 20888.0 … 702130.0, 9.44345e6, 2.97287e7] + gctimes: Array{Float64}((10000,)) [0.0, 0.0, 0.0 … 661810.0, 9.39459e6, 2.9682e7] + memory: Int64 16640 + allocs: Int64 20 ``` As you can see from the above, a couple of different timing estimates are pretty-printed with the `Trial`. You can calculate these estimates yourself using the `minimum`, `median`, `mean`, and `maximum` functions: @@ -364,31 +380,35 @@ As you can see from the above, a couple of different timing estimates are pretty ```julia julia> minimum(t) BenchmarkTools.TrialEstimate: - time: 33.262 μs + realtime: 20.802 μs + cputime: 20.841 μs (100.19%) gctime: 0.000 ns (0.00%) - memory: 9.30 KiB - allocs: 28 + memory: 16.25 KiB + allocs: 20 julia> median(t) BenchmarkTools.TrialEstimate: - time: 38.618 μs + realtime: 24.214 μs + cputime: 24.261 μs (100.19%) gctime: 0.000 ns (0.00%) - memory: 9.30 KiB - allocs: 28 + memory: 16.25 KiB + allocs: 20 julia> mean(t) BenchmarkTools.TrialEstimate: - time: 39.981 μs - gctime: 1.058 μs (2.65%) - memory: 9.30 KiB - allocs: 28 + realtime: 29.984 μs + cputime: 30.020 μs (100.12%) + gctime: 4.185 μs (13.96%) + memory: 16.25 KiB + allocs: 20 julia> maximum(t) BenchmarkTools.TrialEstimate: - time: 2.814 ms - gctime: 2.675 ms (95.07%) - memory: 9.30 KiB - allocs: 28 + realtime: 29.755 ms + cputime: 29.729 ms (99.91%) + gctime: 29.682 ms (99.75%) + memory: 16.25 KiB + allocs: 20 ``` ### Which estimator should I use? @@ -430,21 +450,24 @@ julia> tune!(b); julia> m1 = median(run(b)) BenchmarkTools.TrialEstimate: - time: 38.638 μs + realtime: 24.119 μs + cputime: 24.165 μs (100.19%) gctime: 0.000 ns (0.00%) - memory: 9.30 KiB - allocs: 28 + memory: 16.25 KiB + allocs: 20 julia> m2 = median(run(b)) BenchmarkTools.TrialEstimate: - time: 38.723 μs + realtime: 24.563 μs + cputime: 24.611 μs (100.20%) gctime: 0.000 ns (0.00%) - memory: 9.30 KiB - allocs: 28 + memory: 16.25 KiB + allocs: 20 julia> ratio(m1, m2) BenchmarkTools.TrialRatio: - time: 0.997792009916587 + realtime: 0.981924032080772 + cputime: 0.9818780220226728 gctime: 1.0 memory: 1.0 allocs: 1.0 @@ -455,35 +478,40 @@ Use the `judge` function to decide if one estimate represents a regression versu ```julia julia> m1 = median(@benchmark eig(rand(10, 10))) BenchmarkTools.TrialEstimate: - time: 38.745 μs + realtime: 24.541 μs + cputime: 24.595 μs (100.22%) gctime: 0.000 ns (0.00%) - memory: 9.30 KiB - allocs: 28 + memory: 14.16 KiB + allocs: 16 julia> m2 = median(@benchmark eig(rand(10, 10))) BenchmarkTools.TrialEstimate: - time: 38.611 μs + realtime: 25.065 μs + cputime: 25.127 μs (100.25%) gctime: 0.000 ns (0.00%) - memory: 9.30 KiB - allocs: 28 + memory: 16.25 KiB + allocs: 20 # percent change falls within noise tolerance for all fields julia> judge(m1, m2) BenchmarkTools.TrialJudgement: - time: +0.35% => invariant (5.00% tolerance) - memory: +0.00% => invariant (1.00% tolerance) + realtime: -2.09% => invariant (5.00% tolerance) + cputime: -2.12% => invariant (5.00% tolerance) + memory: -12.88% => improvement (1.00% tolerance) # changing time_tolerance causes it to be marked as a regression julia> judge(m1, m2; time_tolerance = 0.0001) BenchmarkTools.TrialJudgement: - time: +0.35% => regression (0.01% tolerance) - memory: +0.00% => invariant (1.00% tolerance) + realtime: -2.09% => improvement (0.01% tolerance) + cputime: -2.12% => improvement (0.01% tolerance) + memory: -12.88% => improvement (1.00% tolerance) # switch m1 & m2; from this perspective, the difference is an improvement julia> judge(m2, m1; time_tolerance = 0.0001) BenchmarkTools.TrialJudgement: - time: -0.35% => improvement (0.01% tolerance) - memory: +0.00% => invariant (1.00% tolerance) + realtime: +2.14% => regression (0.01% tolerance) + cputime: +2.16% => regression (0.01% tolerance) + memory: +14.79% => regression (1.00% tolerance) # you can pass in TrialRatios as well julia> judge(ratio(m1, m2)) == judge(m1, m2) diff --git a/doc/reference.md b/doc/reference.md index 874db7c5..2525369c 100644 --- a/doc/reference.md +++ b/doc/reference.md @@ -65,9 +65,13 @@ Not exported. A type that stores a classification of a `TrialRatio`'s time and m ## Accessor Functions -##### `time(x::Union{Trial, TrialEstimate, TrialRatio, TrialJudgement, BenchmarkGroup})` +##### `realtime(x::Union{Trial, TrialEstimate, TrialRatio, TrialJudgement, BenchmarkGroup})` -Returns the time value (in nanoseconds) associated with `x`. If `isa(x, TrialJudgement)`, the value will not be a number, but a `Symbol` (`:regression`, `:invariant`, or `:improvement`). If `isa(x, BenchmarkGroup)`, return a `BenchmarkGroup` where `time` has been applied to the values of `x`. +Returns the real-time value (in nanoseconds) associated with `x`. If `isa(x, TrialJudgement)`, the value will not be a number, but a `Symbol` (`:regression`, `:invariant`, or `:improvement`). If `isa(x, BenchmarkGroup)`, return a `BenchmarkGroup` where `time` has been applied to the values of `x`. + +##### `cputime(x::Union{Trial, TrialEstimate, TrialRatio, TrialJudgement, BenchmarkGroup})` + +Returns the cpu-time value (in nanoseconds) associated with `x`. If `isa(x, TrialJudgement)`, the value will not be a number, but a `Symbol` (`:regression`, `:invariant`, or `:improvement`). If `isa(x, BenchmarkGroup)`, return a `BenchmarkGroup` where `time` has been applied to the values of `x`. ##### `memory(x::Union{Trial, TrialEstimate, TrialRatio, TrialJudgement, BenchmarkGroup})` From e4c3ce86551e383b0c2eb12f732c776a8f3f5113 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 19 Jan 2018 17:03:52 -0500 Subject: [PATCH 5/8] add license notice to files under src/ --- src/BenchmarkTools.jl | 2 ++ src/execution.jl | 2 ++ src/groups.jl | 2 ++ src/parameters.jl | 2 ++ src/serialization.jl | 2 ++ src/trials.jl | 2 ++ 6 files changed, 12 insertions(+) diff --git a/src/BenchmarkTools.jl b/src/BenchmarkTools.jl index bab416f2..a08c53cf 100644 --- a/src/BenchmarkTools.jl +++ b/src/BenchmarkTools.jl @@ -1,3 +1,5 @@ +# This file is a part of BenchmarkTools.jl. License is MIT + __precompile__() module BenchmarkTools diff --git a/src/execution.jl b/src/execution.jl index cf09b54c..cca1d31a 100644 --- a/src/execution.jl +++ b/src/execution.jl @@ -1,3 +1,5 @@ +# This file is a part of BenchmarkTools.jl. License is MIT + # Trigger several successive GC sweeps. This is more comprehensive than running just a # single sweep, since freeable objects may need more than one sweep to be appropriately # marked and freed. diff --git a/src/groups.jl b/src/groups.jl index 37fda3b9..7661278b 100644 --- a/src/groups.jl +++ b/src/groups.jl @@ -1,3 +1,5 @@ +# This file is a part of BenchmarkTools.jl. License is MIT + ################## # BenchmarkGroup # ################## diff --git a/src/parameters.jl b/src/parameters.jl index 015bb95c..f995640b 100644 --- a/src/parameters.jl +++ b/src/parameters.jl @@ -1,3 +1,5 @@ +# This file is a part of BenchmarkTools.jl. License is MIT + # most machines will be higher resolution than this, but we're playing it safe const RESOLUTION = 1000 # 1 μs = 1000 ns diff --git a/src/serialization.jl b/src/serialization.jl index f568017e..96b1c8f6 100644 --- a/src/serialization.jl +++ b/src/serialization.jl @@ -1,3 +1,5 @@ +# This file is a part of BenchmarkTools.jl. License is MIT + if VERSION >= v"0.7.0-DEV.2437" using Base.Meta: parse end diff --git a/src/trials.jl b/src/trials.jl index 2a571904..94acc282 100644 --- a/src/trials.jl +++ b/src/trials.jl @@ -1,3 +1,5 @@ +# This file is a part of BenchmarkTools.jl. License is MIT + ######### # Trial # ######### From b6ae23c5d93523e255475f73b0d7a5c5a56121f4 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 22 Jan 2018 19:09:18 -0500 Subject: [PATCH 6/8] Update docs and disable printing of cpu-time `:GetProcessTimes` has a resolution of `100ns` which is much worse than the resolution of the high-precision clock. --- doc/manual.md | 13 +++++++++---- src/timers/timers.jl | 2 ++ src/trials.jl | 22 +++++++++++++++++----- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/doc/manual.md b/doc/manual.md index 0b886248..74fb4136 100644 --- a/doc/manual.md +++ b/doc/manual.md @@ -46,8 +46,7 @@ recording the total time `t` it takes to record `n` evaluations, and estimating ## The difference between real-time and cpu-time BenchmarkTools measures both real-time and cpu-time. Real-time is the time that has passed while running your program and cpu-time is the time -that a processor has spend executing your program. As an example when the operating system deschedules your program, real-time is still passing, -but cpu-time is paused, this is also the case when your program spends time in the kernel or is blocked waiting. As an example take a program that +that a processor has spend executing your program. Real-time is often also referred to as wall-time. As an example take a program that is doing `sleep(1)`. ```julia @@ -55,8 +54,14 @@ julia> @btime sleep(1) 1.001 s [0.01% CPU, 0.00% GC] (5 allocations: 192 bytes) ``` -As we can see, when we call sleep our program is not using CPU cycles and thus cpu-time is `0.01%` of our actual runtime. Do to sligh differences -in accounting cpu-time can sometimes exceed `100%`. +As we can see, when we call sleep our program is not using CPU cycles and thus cpu-time is `0.01%` of our actual runtime. +Sometimes cpu-time can exceed `100%`, this is either due to slight differences in accounting or because your process is +using multiple cpus. + +### Windows +Cpu-time measurments on Windows have a low resolution (in comparision to real-time) and are as such less useful. To avoid confusion +printing of cpu-time is disabled by default. If you are benchmarking code that runs in the `ms` range you can set the environment variable +`BT_FORCE_CPUTIME` to enable printing of cpu-time results (Remeber to evict your cached version of BenchmarkTools). # Benchmarking basics diff --git a/src/timers/timers.jl b/src/timers/timers.jl index d1643090..8b27e3d9 100644 --- a/src/timers/timers.jl +++ b/src/timers/timers.jl @@ -3,6 +3,8 @@ module Timers import Compat +const ACCURATE_CPUTIME = Compat.Sys.iswindows() ? haskey(ENV, "BT_FORCE_CPUTIME") : true + """ realtime() diff --git a/src/trials.jl b/src/trials.jl index 94acc282..adeebf77 100644 --- a/src/trials.jl +++ b/src/trials.jl @@ -188,7 +188,7 @@ function ratio(a::TrialEstimate, b::TrialEstimate) end gcratio(t::TrialEstimate) = ratio(gctime(t), realtime(t)) -cpuratio(t::TrialEstimate) = ratio(cputime(t), realtime(t)) +cpuratio(t::TrialEstimate) = Timers.ACCURATE_CPUTIME ? ratio(cputime(t), realtime(t)) : NaN ################## # TrialJudgement # @@ -322,8 +322,12 @@ end @compat function Base.show(io::IO, ::MIME"text/plain", t::TrialEstimate) println(io, "BenchmarkTools.TrialEstimate: ") println(io, " realtime: ", prettytime(realtime(t))) - println(io, " cputime: ", prettytime(cputime(t)), " (", prettypercent(cputime(t) / realtime(t)),")") - println(io, " gctime: ", prettytime(gctime(t)), " (", prettypercent(gctime(t) / realtime(t)),")") + if Timers.ACCURATE_CPUTIME + println(io, " cputime: ", prettytime(cputime(t)), " (", prettypercent(cpuratio(t)),")") + else + println(io, " cputime: ", "NA on Windows, see docs") + end + println(io, " gctime: ", prettytime(gctime(t)), " (", prettypercent(gcratio(t)),")") println(io, " memory: ", prettymemory(memory(t))) print(io, " allocs: ", allocs(t)) end @@ -331,7 +335,11 @@ end @compat function Base.show(io::IO, ::MIME"text/plain", t::TrialRatio) println(io, "BenchmarkTools.TrialRatio: ") println(io, " realtime: ", realtime(t)) - println(io, " cputime: ", cputime(t)) + if Timers.ACCURATE_CPUTIME + println(io, " cputime: ", cputime(t)) + else + println(io, " cputime: ", "NA on Windows, see docs") + end println(io, " gctime: ", gctime(t)) println(io, " memory: ", memory(t)) print(io, " allocs: ", allocs(t)) @@ -340,6 +348,10 @@ end @compat function Base.show(io::IO, ::MIME"text/plain", t::TrialJudgement) println(io, "BenchmarkTools.TrialJudgement: ") println(io, " realtime: ", prettydiff(realtime(ratio(t))), " => ", realtime(t), " (", prettypercent(params(t).time_tolerance), " tolerance)") - println(io, " cputime: ", prettydiff(cputime(ratio(t))), " => ", cputime(t), " (", prettypercent(params(t).time_tolerance), " tolerance)") + if Timers.ACCURATE_CPUTIME + println(io, " cputime: ", prettydiff(cputime(ratio(t))), " => ", cputime(t), " (", prettypercent(params(t).time_tolerance), " tolerance)") + else + println(io, " cputime: ", "NA on Windows, see docs") + end print(io, " memory: ", prettydiff(memory(ratio(t))), " => ", memory(t), " (", prettypercent(params(t).memory_tolerance), " tolerance)") end From d7db0bfb7bd1f29963c661516f9d8e7a7582ff74 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Mon, 22 Jan 2018 19:19:41 -0500 Subject: [PATCH 7/8] add simple sanity checks --- test/ExecutionTests.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/ExecutionTests.jl b/test/ExecutionTests.jl index 9f7f7924..7d70b51f 100644 --- a/test/ExecutionTests.jl +++ b/test/ExecutionTests.jl @@ -184,4 +184,16 @@ let fname = tempname() end end +############ +# cpu-time # +############ + +# some simple sanity checks +t = @benchmark sleep(1) +@test cputime(t) < 1e9 +t = @benchmark sin(1) +if BenchmarkTools.Timers.ACCURATE_CPUTIME + @test cputime(t) > 5e8 # 50% cputime +end + end # module From 8ea633a6daf894117c55781f72c2fa92714cd519 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Thu, 25 Jan 2018 11:58:09 -0500 Subject: [PATCH 8/8] introduces the notions of Measurement and MeasurementDelta a `Measurement` is a a given timestamp (which can measure multiple things) a `MeasurementDelta` is the difference between two `Measurements` and currently trackes real-time and the ratio of cpu-time/real-time. All the conversion and calculation is part of MeasurementDelta so that it is 'cheap' to obtain a Measurement. As an example currently we convert every `time_ns` to Float64 which we can delay using this notion of a MeasurementDelta. --- src/timers/darwin.jl | 34 ++++++++++++++++++++++++++---- src/timers/timers.jl | 17 +++++++++++++++ src/timers/unix.jl | 24 +++++++++++++++++++++ src/timers/windows.jl | 49 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+), 4 deletions(-) diff --git a/src/timers/darwin.jl b/src/timers/darwin.jl index c126f2d0..86774e2c 100644 --- a/src/timers/darwin.jl +++ b/src/timers/darwin.jl @@ -35,9 +35,9 @@ struct RUsage ru_nivcsw::Clong end -@inline function maketime(ru::RUsage) - user = ru.ru_utime.tv_sec * 1e9 + ru.ru_utime.tv_usec *1e3 - kernel = ru.ru_stime.tv_sec * 1e9 + ru.ru_stime.tv_usec *1e3 +@inline function maketime(utime::TimeVal, stime::TimeVal) + user = utime.tv_sec * 1e9 + utime.tv_usec *1e3 + kernel = stime.tv_sec * 1e9 + stime.tv_usec *1e3 return user+kernel end @@ -48,5 +48,31 @@ end @inline function cputime() ru = Ref{RUsage}() ccall(:getRUsage, Cint, (Cint, Ref{RUsage}), RUSAGE_SELF, ru) - return maketime(ru[]) + return maketime(ru[].ru_utime, ru[].ru_stime) +end + +struct Measurement + realtime::UInt64 + uime::TimeVal + stime::TimeVal + function Measurement() + rtime = time_ns() + ru = Ref{RUsage}() + ccall(:getRUsage, Cint, (Cint, Ref{RUsage}), RUSAGE_SELF, ru) + return new(rtime, ru[].ru_utime, ru[].ru_stime) + end +end + +struct MeasurementDelta + realtime::Float64 + cpuratio::Float64 + function MeasurementDelta(t1::Measurement, t0::Measurement) + rt0 = Float64(t0.realtime) + ct0 = maketime(t0.utime, t0.stime) + rt1 = Float64(t1.realtime) + ct1 = maketime(t1.utime, t1.stime) + realtime = rt1 - rt0 + cputime = ct1 - ct0 + return new(realtime, cputime/realtime) + end end diff --git a/src/timers/timers.jl b/src/timers/timers.jl index 8b27e3d9..1d4c2e2c 100644 --- a/src/timers/timers.jl +++ b/src/timers/timers.jl @@ -3,6 +3,17 @@ module Timers import Compat +### +# For each supported operating system, we define a struct: +# struct Measurement end +# -(a::Measurement, b::Measurement) -> MeasurmentDelta +# +# struct MeasurmentDelta end +# isless +# time +# cpuratio +### + const ACCURATE_CPUTIME = Compat.Sys.iswindows() ? haskey(ENV, "BT_FORCE_CPUTIME") : true """ @@ -36,4 +47,10 @@ elseif Compat.Sys.iswindows() else error("$(Sys.KERNEL) is not supported please file an issue") end + +Base.isless(a::MeasurementDelta, b::MeasurementDelta) = isless(time(a), time(b)) +Base.:(-)(t1::Measurement, t0::Measurement) = MeasurementDelta(t1, t0) +time(a::MeasurementDelta) = a.realtime +cpuratio(a::MeasurementDelta) = a.cpuratio + end # module \ No newline at end of file diff --git a/src/timers/unix.jl b/src/timers/unix.jl index 5cb19e5f..ccd227ab 100644 --- a/src/timers/unix.jl +++ b/src/timers/unix.jl @@ -46,3 +46,27 @@ end @inline function cputime() maketime(clock_gettime(CLOCK_PROCESS_CPUTIME_ID)) end + +struct Measurement + realtime::TimeSpec + cputime::TimeSpec + function Measurement() + rtime = clock_gettime(CLOCK_MONOTONIC) + ctime = clock_gettime(CLOCK_PROCESS_CPUTIME_ID) + return new(rtime, ctime) + end +end + +struct MeasurementDelta + realtime::Float64 + cpuratio::Float64 + function MeasurementDelta(t1::Measurement, t0::Measurement) + rt0 = maketime(t0.realtime) + ct0 = maketime(t0.cputime) + rt1 = maketime(t1.realtime) + ct1 = maketime(t1.cputime) + realtime = rt1 - rt0 + cputime = ct1 - ct0 + return new(realtime, cputime/realtime) + end +end diff --git a/src/timers/windows.jl b/src/timers/windows.jl index 80dba472..4301a4d6 100644 --- a/src/timers/windows.jl +++ b/src/timers/windows.jl @@ -41,3 +41,52 @@ end proc, creation_time, exit_time, kernel_time, user_time) return maketime(kernel_time[], user_time[]) end + +@inline function frequency() + freq = Ref{UInt64}() + ccall(:QueryPerformanceFrequency, Cint, (Ref{UInt64},), freq) + return freq[] +end + +@inline function currentprocess() + return ccall(:GetCurrentProcess, HANDLE, ()) +end + +@inline function cpucycles(proc::HANDLE) + cycles = Ref{UInt64}() + ccall(:QueryProcessCycleTime, Cint, (HANDLE, Ref{UInt64}), proc, cycles) + return cycles[] +end + +@inline function perfcounter() + counter = Ref{UInt64}() + ccall(:QueryPerformanceCounter, Cint, (Ref{UInt64},), counter) + return counter[] +end + +struct Measurement + time::UInt64 + cpu::UInt64 + function Measurement() + proc = currentprocess() + time = perfcounter() + cpu = cpucycles() + return new(time, cnu) + end +end + +struct MeasurementDelta + realtime::Float64 + cpuratio::Float64 + function MeasurementDelta(t1::Measurement, t0::Measurement) + freq = frequency() + rt0 = t0.time + ct0 = t0.cpu + rt1 = t1.time + ct1 = t1.cpu + realcycles = rt1 - rt0 + realtime = realcycles / freq + cpucycles = ct1 - ct0 + new(realtime, cpucycles/realcycles) + end +end