In [1]:
using BenchmarkTools
using ExtractRandom

In [2]:
types = [Vector{Bool}, BitVector, Vector{Int8}, Vector{Int64}]
data_sizes = 2.^(6:4:30)
a_sizes = [(512, 1024)]
const suite = BenchmarkGroup(["two_universal"])

for itype in types
    suite[itype] = BenchmarkGroup()
    for o in data_sizes
        suite[itype][div(o, 2)] = BenchmarkGroup()
    end
end

for itype in types
    for data_size in data_sizes
        for a_size in a_sizes
            (data_size % a_size[2] == 0 && data_size ≥ a_size[2]) || continue
            
            a = rand(eltype(itype), a_size)
            v = rand(eltype(itype), data_size)
            
            multiple = div(length(v), size(a, 2))
            if eltype(itype) <: Bool
                a = convert(BitMatrix, a)
                v = convert(BitArray, v)
                n = div(length(v), size(a, 2)) * size(a, 1)
                x = BitVector(n)
            else
                n = round(Int64, (div(length(v), size(a, 2)) * size(a, 1)) / 8sizeof(eltype(itype)), RoundUp)
                x = convert(itype, zeros(eltype(itype), n))
            end
            suite[itype][div(data_size, 2)][a_size] = @benchmarkable two_universal!($(a), $(v), $(x))
        end
    end
end


In [None]:
tune!(suite)

In [None]:
 results = run(suite, verbose = true, seconds = 1)

In [3]:
results[Vector{Int64}][512][(64,128)]

LoadError: UndefVarError: results not defined

In [4]:
showall(suite)

4-element BenchmarkTools.BenchmarkGroup:
  tags: ["two_universal"]
  Array{Bool,1} => 7-element BenchmarkTools.BenchmarkGroup:
	  tags: []
	  512 => 1-element BenchmarkTools.BenchmarkGroup:
		  tags: []
		  (512,1024) => Benchmark(evals=1, seconds=5.0, samples=10000)
	  2097152 => 1-element BenchmarkTools.BenchmarkGroup:
		  tags: []
		  (512,1024) => Benchmark(evals=1, seconds=5.0, samples=10000)
	  32 => 0-element BenchmarkTools.BenchmarkGroup:
		  tags: []
	  33554432 => 1-element BenchmarkTools.BenchmarkGroup:
		  tags: []
		  (512,1024) => Benchmark(evals=1, seconds=5.0, samples=10000)
	  131072 => 1-element BenchmarkTools.BenchmarkGroup:
		  tags: []
		  (512,1024) => Benchmark(evals=1, seconds=5.0, samples=10000)
	  8192 => 1-element BenchmarkTools.BenchmarkGroup:
		  tags: []
		  (512,1024) => Benchmark(evals=1, seconds=5.0, samples=10000)
	  536870912 => 1-element BenchmarkTools.BenchmarkGroup:
		  tags: []
		  (512,1024) => Benchmark(evals=1, seconds=5.0, samples=10000)
  Arr

In [13]:
run(suite[Vector{Int64}][8192][(512, 1024)], samples=3, seconds=60)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     11.912 s (0.00% GC)
  median time:      12.205 s (0.00% GC)
  mean time:        12.269 s (0.00% GC)
  maximum time:     12.690 s (0.00% GC)
  --------------
  samples:          3
  evals/sample:     1

In [15]:
run(suite[Vector{Int8}][8192][(512, 1024)], samples=20, seconds=60)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     4.356 s (0.00% GC)
  median time:      4.425 s (0.00% GC)
  mean time:        4.432 s (0.00% GC)
  maximum time:     4.554 s (0.00% GC)
  --------------
  samples:          14
  evals/sample:     1

In [20]:
using ExtractRandom, BenchmarkTools
T, n, m, mult = Int8, 512, 1024, 64

benchit(T::Type, n::Integer, m::Integer, mult::Integer, algo::Symbol) = begin
    nbits = 8sizeof(T)
    a = rand(T, (n, m))
    v = rand(T, size(a, 2) * mult)
    out = zeros(T, round(Int64, (size(a, 1) * mult) / nbits, RoundUp))
    if algo == :bits
        a = convert(BitMatrix, a)
        v = convert(BitArray, v)
        out = BitArray(length(out) * 8sizeof(T))
        @benchmarkable two_universal!($(Val{algo}), $(a), $(v), $(out))
    elseif algo == :row_major
        a = transpose(a)
        println(length(v), " ", length(out), " ", size(a))
        @benchmarkable ExtractRandom.row_major_two_universal!($(a), $(v), $(out))
    elseif algo == :full_width
        @assert n % nbits == 0
        a = rand(T, (m * nbits, div(n, nbits)))
        v = rand(T, (m * mult))
        shifts = rand(T(1):T(nbits - 1), size(a))
        out = zeros(T, size(a, 2) * div(length(v), size(a, 1)))
        println(length(v), " ", length(out), size(a))
        @benchmarkable ExtractRandom.full_width_two_universal!($a, $shifts, $v, $out)
    else
        @benchmarkable two_universal!($(Val{algo}), $(a), $(v), $(out))
    end  
end



benchit (generic function with 1 method)

In [18]:
b = benchit(Int64, 64*512, 1024, 10, :work)
tune!(b)
run(b)

BenchmarkTools.Trial: 
  memory estimate:  257.02 KiB
  allocs estimate:  22
  --------------
  minimum time:     415.015 ms (0.00% GC)
  median time:      436.494 ms (0.00% GC)
  mean time:        442.841 ms (0.00% GC)
  maximum time:     492.409 ms (0.00% GC)
  --------------
  samples:          12
  evals/sample:     1

In [20]:
b = benchit(Int8, 64*512, 8 * 1024, 10, :work)
tune!(b)
run(b)

BenchmarkTools.Trial: 
  memory estimate:  33.08 KiB
  allocs estimate:  22
  --------------
  minimum time:     3.896 s (0.00% GC)
  median time:      3.912 s (0.00% GC)
  mean time:        3.912 s (0.00% GC)
  maximum time:     3.927 s (0.00% GC)
  --------------
  samples:          2
  evals/sample:     1

In [21]:
b = benchit(Int8, 64*512, 8*1024, 10, :naive)
tune!(b)
run(b)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     116.579 s (0.00% GC)
  median time:      116.579 s (0.00% GC)
  mean time:        116.579 s (0.00% GC)
  maximum time:     116.579 s (0.00% GC)
  --------------
  samples:          1
  evals/sample:     1

In [1]:
using ExtractRandom, BenchmarkTools
b = @benchmarkable(ExtractRandom.naive_xorbits(1))
tune!(b)
run(b)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     57.119 ns (0.00% GC)
  median time:      58.767 ns (0.00% GC)
  mean time:        62.460 ns (0.00% GC)
  maximum time:     472.839 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     983

In [3]:
b = @benchmarkable(ExtractRandom.weird_xorbits(1))
tune!(b)
run(b)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     1.695 ns (0.00% GC)
  median time:      1.797 ns (0.00% GC)
  mean time:        1.900 ns (0.00% GC)
  maximum time:     14.262 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1000

In [7]:
b = benchit(Int64, 64*512, 1024, 10, :work)
tune!(b)
run(b)

BenchmarkTools.Trial: 
  memory estimate:  257.02 KiB
  allocs estimate:  22
  --------------
  minimum time:     394.871 ms (0.00% GC)
  median time:      406.699 ms (0.00% GC)
  mean time:        406.014 ms (0.00% GC)
  maximum time:     415.807 ms (0.00% GC)
  --------------
  samples:          13
  evals/sample:     1

In [21]:
b = benchit(Int64, 64*512, 1024, 64, :row_major)
#tune!(b)
#run(b)

65536 32768 (1024,32768)


Benchmark(evals=1, seconds=5.0, samples=10000)

In [22]:
b = benchit(Int64, 64*512, 1024, 64, :full_width)
#tune!(b)
#run(b)

65536 512(65536,512)


Benchmark(evals=1, seconds=5.0, samples=10000)

In [24]:
a = ExtractRandom.RowMajorTwoUniversal(rand(Int64, (1024, 32768)))
length(a(rand(Int64, 1024 * 64)))

32768

In [25]:
b = ExtractRandom.FullWidthTwoUniversal(rand(Int64, (64 * 1024, 512)))

32768

In [26]:
65536 * 512 == 1024 * 32768

true