Skip to content

Commit

Permalink
Merge pull request #618 from sbromberger/sbromberger/dibitvectors
Browse files Browse the repository at this point in the history
dibit_vector
  • Loading branch information
eulerkochy committed May 23, 2020
2 parents 082b283 + e53ac6a commit 0d9cb40
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Project.toml
@@ -1,6 +1,6 @@
name = "DataStructures"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.16"
version = "0.17.17"

[deps]
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
Expand Down
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -28,6 +28,7 @@ This package implements a variety of data structures, including
- Sorted Dict, Sorted Multi-Dict and Sorted Set
- DataStructures.IntSet
- SparseIntSet
- DiBitVector (in which each element can store two bits)

Resources
---------
Expand Down
1 change: 1 addition & 0 deletions docs/make.jl
Expand Up @@ -24,6 +24,7 @@ makedocs(
"mutable_linked_list.md",
"intset.md",
"sorted_containers.md",
"dibit_vector.md",
],
modules = [DataStructures],
format = Documenter.HTML()
Expand Down
47 changes: 47 additions & 0 deletions docs/src/dibit_vector.md
@@ -0,0 +1,47 @@
```@meta
DocTestSetup = :(using DataStructures)
```

# DiBitVector

`DiBitVector` provides a memory-efficient vector of elements that represent four different values from `0` to `3`. This structure is comparable to a `BitVector` in its performance and memory characteristics.

Examples:

```jldoctest
julia> v = DiBitVector(4, 0)
4-element DiBitVector:
0x00
0x00
0x00
0x00
julia> w = DiBitVector(4, 2)
4-element DiBitVector:
0x02
0x02
0x02
0x02
julia> v[1] = 2
2
julia> v[2:4] .= 2
3-element view(::DiBitVector, 2:4) with eltype UInt8:
0x02
0x02
0x02
julia> v == w
true
julia> pop!(v)
0x02
julia> length(v)
3
```

```@meta
DocTestSetup = nothing
```
4 changes: 3 additions & 1 deletion docs/src/index.md
Expand Up @@ -21,6 +21,7 @@ This package implements a variety of data structures, including
- Sorted Dict, Sorted Multi-Dict and Sorted Set
- DataStructures.IntSet
- SparseIntSet
- DiBitVector

## Contents

Expand All @@ -44,6 +45,7 @@ Pages = [
"mutable_linked_list.md",
"intset.md",
"sorted_containers.md",
"sparse_int_set.md"
"sparse_int_set.md",
"dibit_vector.md"
]
```
7 changes: 6 additions & 1 deletion src/DataStructures.jl
Expand Up @@ -11,7 +11,9 @@ module DataStructures
ReverseOrdering, Reverse, Lt,
isless, union, intersect, symdiff, setdiff, issubset,
searchsortedfirst, searchsortedlast, in,
eachindex, keytype, valtype, minimum, maximum, size
eachindex, keytype, valtype, minimum, maximum, size,
zero, checkbounds


using OrderedCollections
import OrderedCollections: filter, filter!, isordered
Expand Down Expand Up @@ -57,6 +59,8 @@ module DataStructures
export MultiDict, enumerateall
export RobinDict

export DiBitVector

export findkey

include("delegate.jl")
Expand Down Expand Up @@ -107,5 +111,6 @@ module DataStructures
include("sparse_int_set.jl")
export SparseIntSet

include("dibit_vector.jl")
include("deprecations.jl")
end
81 changes: 81 additions & 0 deletions src/dibit_vector.jl
@@ -0,0 +1,81 @@
"""
DiBitVector(n::Integer, v::Integer)
Create a `DiBitVector` with `n` elements preinitialized to a value `v`
from `0` to `3`, inclusive.
A `DiBitVector` is a vector whose elements are two bits wide, allowing
storage of integer values between 0 and 3. This structure is optimized for
performance and memory savings for large numbers of elements.
"""
mutable struct DiBitVector <: AbstractVector{UInt8}
data::Vector{UInt64}
len::UInt

function DiBitVector(n::Integer, v::Integer)
if Int(n) < 0
throw(ArgumentError("n ($n) must be greater than or equal to zero"))
end
if !(Int(v) in 0:3)
throw(ArgumentError("v ($v) must be in 0:3"))
end
fv = (0x0000000000000000, 0x5555555555555555,
0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff)[v + 1]
vec = Vector{UInt64}(undef, cld(n, 32))
fill!(vec, fv)
return new(vec, n % UInt64)
end
end

@inline checkbounds(D::DiBitVector, n::Integer) = 0 < n length(D.data) << 5 || throw(BoundsError(D, n))

"""
DiBitVector(n::Integer)
Create a [`DiBitVector`](@ref) with `n` elements set to `0`.
"""
DiBitVector(n::Integer) = DiBitVector(n, 0)
DiBitVector() = DiBitVector(0, 0)

@inline Base.length(x::DiBitVector) = x.len % Int
@inline Base.size(x::DiBitVector) = (length(x),)

@inline index(n::Integer) = ((n-1) >>> 5) + 1
@inline offset(n::Integer) = ((UInt64(n)-1) << 1) & 63

@inline function Base.getindex(x::DiBitVector, i::Int)
@boundscheck checkbounds(x, i)
return UInt8((@inbounds x.data[index(i)] >>> offset(i)) & 3)
end

@inline function unsafe_setindex!(x::DiBitVector, v::UInt64, i::Int)
bits = @inbounds x.data[index(i)]
bits &= ~(UInt64(3) << offset(i))
bits |= convert(UInt64, v) << offset(i)
@inbounds x.data[index(i)] = bits
end

@inline function Base.setindex!(x::DiBitVector, v::Integer, i::Int)
v & 3 == v || throw(DomainError("Can only contain 0:3 (tried $v)"))
@boundscheck checkbounds(x, i)
unsafe_setindex!(x, convert(UInt64, v), i)
end

@inline function Base.push!(x::DiBitVector, v::Integer)
len = length(x)
len == UInt64(length(x.data)) << 5 && push!(x.data, zero(UInt64))
x.len = (len + 1) % UInt64
x[len+1] = convert(UInt64, v)
return x
end

@inline function Base.pop!(x::DiBitVector)
x.len == 0 && throw(ArgumentError("array must be non-empty"))
v = x[end]
x.len = (x.len - 1) % UInt64
x.len == UInt64((length(x.data) -1)) << 5 && pop!(x.data)
return v
end

@inline zero(x::DiBitVector) = DiBitVector(x.len, 0)

1 change: 1 addition & 0 deletions test/runtests.jl
Expand Up @@ -30,6 +30,7 @@ tests = ["int_set",
"priority_queue",
"fenwick",
"robin_dict",
"dibit_vector",
]

if length(ARGS) > 0
Expand Down
58 changes: 58 additions & 0 deletions test/test_dibit_vector.jl
@@ -0,0 +1,58 @@
@testset "DiBitVectors" begin
d0 = DiBitVector()
d1 = DiBitVector(10)
d2 = DiBitVector(10, 0)

@test_throws ArgumentError DiBitVector(5, 4)
@test_throws ArgumentError DiBitVector(5, -1)

@test_throws ArgumentError DiBitVector(-5)
@test_throws ArgumentError DiBitVector(-5, 1)

@test length(d0) == 0
@test isempty(d0)
@test_throws ArgumentError pop!(d0)
push!(d0, 1)
@test length(d0) == 1
@test pop!(d0) == 1
@test length(d0) == 0
@test_throws ArgumentError pop!(d0)

@test length(d1) == length(d2) == 10
@test d1 == d2
@test all(d1 .== 0)
@test all(d2 .== 0)

@test size(d1) == size(d2) == (10,)

d3 = DiBitVector(30, 3)
@test all(d3 .== 3)
@test d3[1] == d3[end] == 3

push!(d3, 0)
@test length(d3) == 31 && length(d3.data) == 1
push!(d3, 1)
@test length(d3) == 32 && length(d3.data) == 1
push!(d3, 2)
@test length(d3) == 33 && length(d3.data) == 2
push!(d3, 3)
@test length(d3) == 34 && length(d3.data) == 2

@test pop!(d3) == 3
@test length(d3) == 33 && length(d3.data) == 2
@test pop!(d3) == 2
@test length(d3) == 32 && length(d3.data) == 1
@test pop!(d3) == 1
@test length(d3) == 31 && length(d3.data) == 1
@test pop!(d3) == 0
@test length(d3) == 30 && length(d3.data) == 1
@test pop!(d3) == 3
@test length(d3) == 29 && length(d3.data) == 1

@test zero(d3) == DiBitVector(length(d3))

@test_throws BoundsError d3[0]
@test_throws BoundsError d3[-1]
@test_throws BoundsError d3[99991]
end

2 comments on commit 0d9cb40

@eulerkochy
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/15208

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.17.17 -m "<description of version>" 0d9cb40acc0e2898fb57022c54bf4432f495ce96
git push origin v0.17.17

Please sign in to comment.