In [1]:
import Base: getindex, size, println
function println(x)
    show(STDOUT, "text/plain", x)
    print("\n")
end
using BenchmarkTools

########## Bit-Wise Operations ##############
BInt = Int64
DInt = UInt64

Ints = Union{Vector{Int}, Int}
Intsu = Union{Ints, UnitRange{Int}}
DInts = Union{Vector{DInt}, DInt}
bmask(ibit::Int) = one(DInt) << (ibit-1)
bmask(ibit::Vector{Int}) = reduce(+, zero(DInt), [one(DInt) << b for b in (ibit.-1)])
bmask(bits::UnitRange{Int}) = ((one(DInt) << (bits.stop - bits.start + 1)) - one(DInt)) << (bits.start-1)

# bit size
bsizeof(x) = sizeof(x) << 3
function bit_length(x::DInt)
    local n = 0
    while x!=0
        n += 1
        x >>= 1
    end
    return n
end

# take a bit/bits
takebit(indices::DInts, ibit::Int) = @. (indices >> (ibit-1)) & 1
takebit(indices::DInt, ibit::Vector{Int}) = @. (indices .>> (ibit-1)) & 1
# a position is 1?
testbit(indices::DInts, ibit::Int) = @. (indices & bmask(ibit)) != 0
testbit(indices::DInt, ibit::Vector{Int}) = @. (indices & bmask(ibit)) != 0

# set a bit
setbit(indices::DInts, ibit::Intsu) = indices .| bmask(ibit)
setbit!(indices::DInts, ibit::Intsu) = indices[:] |= bmask(ibit)

# flip a bit/bits
flip(indices::DInts, ibit::Intsu) = xor.(indices, bmask(ibit))
flip!(indices::DInts, ibit::Intsu) = indices[:] = xor.(indices, bmask(ibit))
# flip all bits
flip(indices::DInts) = ~indices
flip!(indices::DInts) = indices[:] = ~indices

flip! (generic function with 2 methods)

In [2]:
"""move 1st dim to last"""
function lrollaxis2!(vec::AbstractArray)
    n = length(vec)
    halfn = n >> 1
    temp = vec[2:2:n]
    vec[1:halfn] = vec[1:2:n]
    vec[halfn+1:end] = temp
    vec
end

"""move last dim to 1st"""
function rrollaxis2!(vec::AbstractArray)
    n = length(vec)
    halfn = n >> 1
    temp = vec[halfn+1:end]
    vec[1:2:n] = vec[1:halfn]
    vec[2:2:n] = temp
    vec
end

function rollaxis2!(v::AbstractArray, k::Int)
    if k > 0
        for i=1:k
            lrollaxis2!(v)
        end
    else
        for i=1:-k
            rrollaxis2!(v)
        end
    end
    v
end

rollaxis2! (generic function with 1 method)

In [55]:
v = randn([2 for i=1:10]...)
v2 = copy(v)
@assert rollaxis2!(v, 5) == permutedims(v2, [(i+4)%10+1 for i=1:10])
@assert rollaxis2!(v, -5) == v2
@assert rollaxis2!(v, 0) == v2

In [5]:
@benchmark permutedims(v2, [(i)%10+1 for i=1:10])

BenchmarkTools.Trial: 
  memory estimate:  48.94 KiB
  allocs estimate:  2573
  --------------
  minimum time:     81.436 μs (0.00% GC)
  median time:      84.490 μs (0.00% GC)
  mean time:        93.673 μs (2.43% GC)
  maximum time:     1.943 ms (89.97% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [221]:
@benchmark rollaxis2!(v2, 1)

BenchmarkTools.Trial: 
  memory estimate:  8.45 KiB
  allocs estimate:  11
  --------------
  minimum time:     2.043 μs (0.00% GC)
  median time:      2.190 μs (0.00% GC)
  mean time:        3.016 μs (20.68% GC)
  maximum time:     254.649 μs (93.09% GC)
  --------------
  samples:          10000
  evals/sample:     9

In [184]:
function roll_prod(s, n, ops)
    st = reshape(s, 2, 1<<(n-1))

    for i = 1:n
        st .= ops[i] * st
        rollaxis2!(s)
    end
    s
end

roll_prod (generic function with 1 method)

In [156]:
function focus_prod(s, n, ops)
    st = reshape(s, 2, 1<<(n-1))
    st .= first(ops) * st

    for i = 2:n
        s = reshape(s, ntuple(x->2, Val{n}))
        perm = collect(1:n)
        perm[1] = i
        perm[i] = 1
        permutedims!(s, s, perm)
        st = reshape(s, 2, 1<<(n-1))
        st .= ops[i] * st
        permutedims!(s, s, perm)
    end
    s
end

focus_prod (generic function with 1 method)

In [157]:
ops = [rand(2, 2) for i=1:10];
s = rand(1<<10)
n = 10
@benchmark focus_prod($s, n, $ops)

BenchmarkTools.Trial: 
  memory estimate:  855.78 KiB
  allocs estimate:  49321
  --------------
  minimum time:     2.263 ms (0.00% GC)
  median time:      2.285 ms (0.00% GC)
  mean time:        2.378 ms (2.28% GC)
  maximum time:     5.386 ms (41.44% GC)
  --------------
  samples:          2101
  evals/sample:     1

In [183]:
ops = [rand(2, 2) for i=1:10];
s = rand(1<<10)
n = 10
@benchmark roll_prod($s, n, $ops)

BenchmarkTools.Trial: 
  memory estimate:  165.09 KiB
  allocs estimate:  72
  --------------
  minimum time:     52.231 μs (0.00% GC)
  median time:      54.464 μs (0.00% GC)
  mean time:        64.145 μs (13.49% GC)
  maximum time:     1.503 ms (90.43% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [116]:
pos_mask(ibit::Int) = 1 << (ibit-1)
pos_mask(ibit::Vector{Int}) = reduce(+, zero(DInt), [1 << b for b in (ibit.-1)])
# take a bit/bits
takebit(indices::DInts, ibit::Int) = (indices .>> (ibit-1)) .& 1
takebit(indices::DInt, ibit::Vector{Int}) = (indices .>> (ibit.-1)) .& 1

setto!(indices::DInts, mask::Ints) = indices[:] .⊻= mask

setto! (generic function with 1 method)

In [267]:
function swap_bits!(basis::Vector{DInt}, i::Int, j::Int)
    temp = takebit(basis, i)
    setto!(basis, takebit(basis, j).<<(i-1))
    setto!(basis, temp.<<(j-1))
end

swap_bits! (generic function with 1 method)

In [264]:
basis = [1,2,3,6,7]
println([bin(b,4) for b in basis])

5-element Array{String,1}:
 "0001"
 "0010"
 "0011"
 "0110"
 "0111"


In [268]:
swap_bits!(basis, 1,2)
println([bin(b,4) for b in basis])

5-element Array{String,1}:
 "0000"
 "0011"
 "0011"
 "0111"
 "0111"


In [246]:
rolling([X, Y, (), chain(K2,G2), L])

LoadError: [91mUndefVarError: rolling not defined[39m

In [62]:
function swapbit(num::Ints, i::Int, j::Int)
    i = i-1
    j = j-1
    k = @. (num >> j) & 1 - (num >> i) & 1
    @. num + k*(1<<i) - k*(1<<j)
end
swapbit(5, 1,2)

6

In [63]:
i,j=0,1
b=collect(1:1000)
@benchmark swapbit(b, i,j)

BenchmarkTools.Trial: 
  memory estimate:  15.88 KiB
  allocs estimate:  2
  --------------
  minimum time:     7.942 μs (0.00% GC)
  median time:      8.186 μs (0.00% GC)
  mean time:        9.024 μs (7.05% GC)
  maximum time:     294.192 μs (89.22% GC)
  --------------
  samples:          10000
  evals/sample:     5

In [49]:
swapbit(b, i,j)

1000-element Array{Int64,1}:
   0
   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
   ⋮
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999

In [56]:
bmask(bits::UnitRange{Int}) = ((one(DInt) << (bits.stop - bits.start + 1)) - one(DInt)) << (bits.start-1)

bmask (generic function with 3 methods)

In [57]:
bin(bmask(1:5))

"11111"

In [3]:
# utils used in controled bits
function indices_with(num_bit::Int, poss::Vector{Int}, vals::Vector{BInt}, indices::Vector{DInt})
    sls = Vector{Union{Colon, Int}}([Colon() for i=1:num_bit])
    sls[poss] = vals.+1
    reshape(getindex(reshape(indices, fill(2, num_bit)...), sls...), 1<<(num_bit-length(poss)))
end

indices_with (generic function with 1 method)

In [27]:
num_bit = 16
basis = collect(UInt, 1:1<<num_bit)-1
@benchmark indices_with(num_bit, [3, 7], [1, 1], basis)

BenchmarkTools.Trial: 
  memory estimate:  144.59 KiB
  allocs estimate:  600
  --------------
  minimum time:     214.452 μs (0.00% GC)
  median time:      218.172 μs (0.00% GC)
  mean time:        247.412 μs (3.21% GC)
  maximum time:     2.100 ms (81.98% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [5]:
function _subspace(num_bit::Int, poss::Vector{Int}, base::DInt, output::Vector{DInt})
    if length(poss) == 0
        return output[1] = base
    else
        rest, pos = poss[1:end-1], poss[end]
        # efficiency of vcat?
        _subspace(num_bit, rest, base, output[1:1<<length(rest)])
        _subspace(num_bit, rest, flip(base, pos), output[1<<length(rest)+1:end])
    end
end

_subspace (generic function with 1 method)

In [6]:
function indices_with(num_bit::Int, poss::Vector{Int}, vals::Vector{BInt})
    remain_poss = setdiff(1:num_bit, poss)
    onepos = poss[vals.!=0]
    mask = length(onepos) == 0?zero(DInt):bmask(onepos)
    output = Array{DInt}(1<<length(remain_poss))
    _subspace(num_bit, remain_poss, mask, output)
    output
end

indices_with (generic function with 2 methods)

In [71]:
num_bit = 16
indices_with(num_bit, [1,2], [0,0])

16384-element Array{UInt64,1}:
 0x000014ba1de12440
 0x0000000005a0be80
 0x0000000005a0be80
 0x0000000005a0be80
 0x00000000058fbe38
 0x00000000058fbe58
 0x00000000058fbe78
 0x00000000058fec26
 0x0000000100000000
 0x00000000058fbf20
 0x0000000100000000
 0x000000000000005a
 0x0000000000000070
                  ⋮
 0x000000000000ff70
 0x000000000000ff72
 0x000000000000ff78
 0x000000000000ff7a
 0x000000000000ff90
 0x000000000000ff92
 0x000000000000ff98
 0x000000000000ff9a
 0x000000000000ffb0
 0x000000000000ffb2
 0x000000000000ffb8
 0x000000000000ffba

In [7]:
neg(num_bit::Int, mask::DInt) = bmask(1:num_bit) ⊻ mask
@show bmask([1,2,3])
bin(neg(16, bmask([1,2,3])))

bmask([1, 2, 3]) = 0x0000000000000007


"1111111111111000"

In [47]:
function indices_with2(num_bit::Int, poss::Vector{Int}, vals::Vector{BInt}, basis::Vector{DInt})
    mask = bmask(poss)
    valmask = bmask(poss[vals.!=0])
    basis[(basis .& mask) .== valmask]
end

indices_with2 (generic function with 1 method)

In [48]:
num_bit = 16
basis = collect(UInt, 0:1<<num_bit-1)
@benchmark indices_with2(num_bit, [1,2,3,4,5,6], [0,0,1,1,0,1], basis)

BenchmarkTools.Trial: 
  memory estimate:  27.03 KiB
  allocs estimate:  70
  --------------
  minimum time:     216.503 μs (0.00% GC)
  median time:      219.320 μs (0.00% GC)
  mean time:        229.426 μs (0.54% GC)
  maximum time:     2.027 ms (80.18% GC)
  --------------
  samples:          10000
  evals/sample:     1

In [24]:
indices_with(num_bit, [1,2], [0,0], basis) == indices_with2(num_bit, [1,2], [0,0], basis)

true

In [28]:
indices_with2(num_bit, [1], [1], basis)

32768-element Array{UInt64,1}:
 0x0000000000000001
 0x0000000000000003
 0x0000000000000005
 0x0000000000000007
 0x0000000000000009
 0x000000000000000b
 0x000000000000000d
 0x000000000000000f
 0x0000000000000011
 0x0000000000000013
 0x0000000000000015
 0x0000000000000017
 0x0000000000000019
                  ⋮
 0x000000000000ffe9
 0x000000000000ffeb
 0x000000000000ffed
 0x000000000000ffef
 0x000000000000fff1
 0x000000000000fff3
 0x000000000000fff5
 0x000000000000fff7
 0x000000000000fff9
 0x000000000000fffb
 0x000000000000fffd
 0x000000000000ffff

In [111]:
function indices_with(num_bit::Int, pos::Int, val::BInt)
    res = Array{DInt}(1<<(num_bit-1))
    step = 1<<pos
    halfstep = step>>1
    halfstep_1 = halfstep-1
    offset = val==1?halfstep:0
    maxval = 1<<num_bit-1
    
    @simd for i in 1:1<<(num_bit-pos)
        start = (i-1)*step+offset
        ihalfstep = i*halfstep
        @inbounds res[ihalfstep-halfstep_1:ihalfstep] = collect(start:start+halfstep_1)
    end
    res
end

indices_with (generic function with 3 methods)

In [112]:
indices_with(4, 1,0)

8-element Array{UInt64,1}:
 0x0000000000000000
 0x0000000000000002
 0x0000000000000004
 0x0000000000000006
 0x0000000000000008
 0x000000000000000a
 0x000000000000000c
 0x000000000000000e

In [96]:
indices_with(num_bit, 1, 0) == indices_with2(num_bit, [1], [0], basis)

true

In [113]:
@benchmark indices_with(num_bit, 1, 0)

BenchmarkTools.Trial: 
  memory estimate:  5.25 MiB
  allocs estimate:  98314
  --------------
  minimum time:     2.018 ms (0.00% GC)
  median time:      2.167 ms (0.00% GC)
  mean time:        3.217 ms (27.34% GC)
  maximum time:     11.664 ms (66.03% GC)
  --------------
  samples:          1550
  evals/sample:     1