In [1]:
using Cxx, ExtractRandom, BenchmarkTools
rotr_j(x::Integer, k::Integer) = (x >>> k) | (x << (8sizeof(x) - k))
rotr_j!{T <: Integer}(x::Vector{T}, k::Vector{T}, out::Vector{T}) = begin
    const N = 8sizeof(T)
    @inbounds for i in eachindex(x)
        const xᵢ = x[i]
        const kᵢ = k[i]
        out[i] = (xᵢ >>> kᵢ) | (xᵢ << (N - kᵢ))
    end
end

rotr_j! (generic function with 1 method)

In [2]:
cxx"""
template<class T> T rotr(T x, T k) {
  auto const N = 8 * sizeof(decltype(x));
  return (x >> k) | (x << (N - k));
}

template<class T> void rotr(T n, T const *a, T const * k, T * out) {
    for(int64_t i(0); i < n; ++i)
       *(out + i) = rotr(*(a + i), *(k + i));
}
"""
rotr_c(x::AbstractVector, k::AbstractVector, out::AbstractVector) = begin
    @cxx rotr(length(x), pointer(x), pointer(k), pointer(out))
end
rotr_c(x::Integer, k::Integer) = @cxx rotr(x, k)

rotr_c (generic function with 2 methods)

In [3]:
a, b, c = ones(Int64, 1000), ones(Int64, 1000), zeros(Int64, 1000)
@benchmark $c .= rotr_j.($a, $b)

BenchmarkTools.Trial: 
  memory estimate:  32 bytes
  allocs estimate:  1
  --------------
  minimum time:     4.782 μs (0.00% GC)
  median time:      4.921 μs (0.00% GC)
  mean time:        5.013 μs (0.00% GC)
  maximum time:     17.294 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     7

In [4]:
@benchmark rotr_j!($a, $b, $c)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     4.247 μs (0.00% GC)
  median time:      4.254 μs (0.00% GC)
  mean time:        4.345 μs (0.00% GC)
  maximum time:     12.985 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     7

In [5]:
@benchmark $a .= rotr_c.($b, $c)

BenchmarkTools.Trial: 
  memory estimate:  32 bytes
  allocs estimate:  1
  --------------
  minimum time:     3.383 μs (0.00% GC)
  median time:      3.385 μs (0.00% GC)
  mean time:        3.483 μs (0.00% GC)
  maximum time:     10.842 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     8

In [6]:
@benchmark rotr_c($a, $b, $c)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     1.962 μs (0.00% GC)
  median time:      1.990 μs (0.00% GC)
  mean time:        2.112 μs (0.00% GC)
  maximum time:     13.743 μs (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     10

In [7]:
@benchmark rotr_j(1, 2)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     1.696 ns (0.00% GC)
  median time:      1.797 ns (0.00% GC)
  mean time:        1.879 ns (0.00% GC)
  maximum time:     15.830 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1000

In [8]:
@benchmark rotr_c(1, 2)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     3.923 ns (0.00% GC)
  median time:      3.929 ns (0.00% GC)
  mean time:        4.016 ns (0.00% GC)
  maximum time:     34.771 ns (0.00% GC)
  --------------
  samples:          10000
  evals/sample:     1000