/
better-benchmark.rb
104 lines (94 loc) · 3.91 KB
/
better-benchmark.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
require 'benchmark'
require 'rsruby'
require 'better-benchmark/comparison-partial'
require 'better-benchmark/bencher'
require 'better-benchmark/comparer'
module Benchmark
BETTER_BENCHMARK_VERSION = '0.8.6'
DEFAULT_REQUIRED_SIGNIFICANCE = 0.01
def self.write_realtime( data_dir, &block )
t = Benchmark.realtime( &block )
File.open( "#{data_dir}/#{Bencher::DATA_FILE}", 'w' ) do |f|
f.print t
end
end
# The number of elements in times1 and times2 should be the same.
# @param [Array] times1
# An Array of elapsed times in float form, measured in seconds
# @param [Array] times2
# An Array of elapsed times in float form, measured in seconds
# @param [Fixnum] required_significance
# The maximum p value needed to declare statistical significance
def self.compare_times( times1, times2, required_significance = DEFAULT_REQUIRED_SIGNIFICANCE )
r = RSRuby.instance
wilcox_result = r.wilcox_test( times1, times2 )
{
:results1 => {
:times => times1,
:mean => r.mean( times1 ),
:stddev => r.sd( times1 ),
},
:results2 => {
:times => times2,
:mean => r.mean( times2 ),
:stddev => r.sd( times2 ),
},
:p => wilcox_result[ 'p.value' ],
:W => wilcox_result[ 'statistic' ][ 'W' ],
:significant => (
wilcox_result[ 'p.value' ] < ( required_significance || DEFAULT_REQUIRED_SIGNIFICANCE )
),
}
end
# @param options [Hash]
# @option options [Fixnum] :iterations
# The number of times to execute the pair of blocks.
# @option options [Fixnum] :inner_iterations
# Used to increase the time taken per iteration.
# @option options [Float] :required_significance
# Maximum allowed p value in order to declare the results statistically significant.
# @option options [Boolean] :verbose
# Whether to print a dot for each iteration (as a sort of progress meter).
#
# To use better-benchmark properly, it is important to set :iterations and
# :inner_iterations properly. There are a few things to bear in mind:
#
# (1) Do not set :iterations too high. It should normally be in the range
# of 10-20, but can be lower. Over 25 should be considered too high.
# (2) Execution time for one run of the blocks under test should not be too
# small (or else random variance will muddle the results). Aim for at least
# 1.0 seconds per iteration.
# (3) Minimize the proportion of any warmup time (and cooldown time) of one
# block run.
#
# In order to achieve these goals, you will need to tweak :inner_iterations
# based on your situation. The exact number you should use will depend on
# the strength of the hardware (CPU, RAM, disk), and the amount of work done
# by the blocks. For code blocks that execute extremely rapidly, you may
# need hundreds of thousands of :inner_iterations.
def self.compare_realtime( options = {}, &block1 )
options[:iterations] ||= 20
options[:inner_iterations] ||= 1
if options[:iterations] > 30
warn "The number of iterations is set to #{options[:iterations]}. " +
"Using too many iterations may make the test results less reliable. " +
"It is recommended to increase the number of :inner_iterations instead."
end
ComparisonPartial.new( block1, options )
end
def self.report_on( result )
puts
puts( "Set 1 mean: %.3f s" % [ result[ :results1 ][ :mean ] ] )
puts( "Set 1 std dev: %.3f" % [ result[ :results1 ][ :stddev ] ] )
puts( "Set 2 mean: %.3f s" % [ result[ :results2 ][ :mean ] ] )
puts( "Set 2 std dev: %.3f" % [ result[ :results2 ][ :stddev ] ] )
puts "p.value: #{result[ :p ]}"
puts "W: #{result[ :W ]}"
puts(
"The difference (%+.1f%%) %s statistically significant." % [
( ( result[ :results2 ][ :mean ] - result[ :results1 ][ :mean ] ) / result[ :results1 ][ :mean ] ) * 100,
result[ :significant ] ? 'IS' : 'IS NOT'
]
)
end
end