Skip to content

Commit

Permalink
Merge pull request #249 from kevinconaway/kevinconaway/add-iterate
Browse files Browse the repository at this point in the history
Add Iterate() method for iterating the bitmap without an iterator
  • Loading branch information
lemire committed Mar 30, 2020
2 parents 239247b + 7c87cea commit c82f60c
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 16 deletions.
12 changes: 12 additions & 0 deletions arraycontainer.go
Expand Up @@ -24,6 +24,18 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
}
}

func (ac *arrayContainer) iterate(cb func(x uint16) bool) bool {
iterator := shortIterator{ac.content, 0}

for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}

return true
}

func (ac *arrayContainer) getShortIterator() shortPeekable {
return &shortIterator{ac.content, 0}
}
Expand Down
91 changes: 76 additions & 15 deletions benchmark_test.go
Expand Up @@ -363,23 +363,84 @@ func BenchmarkCountBitset(b *testing.B) {

// go test -bench BenchmarkIterate -run -
func BenchmarkIterateRoaring(b *testing.B) {
b.StopTimer()
r := rand.New(rand.NewSource(0))
s := NewBitmap()
sz := 150000
initsize := 65000
for i := 0; i < initsize; i++ {
s.Add(uint32(r.Int31n(int32(sz))))
}
b.StartTimer()
for j := 0; j < b.N; j++ {
c9 = uint(0)
i := s.Iterator()
for i.HasNext() {
i.Next()
c9++
newBitmap := func() *Bitmap {
r := rand.New(rand.NewSource(0))
s := NewBitmap()
sz := 150000
initsize := 65000
for i := 0; i < initsize; i++ {
s.Add(uint32(r.Int31n(int32(sz))))
}
return s
}

b.Run("iterator-compressed", func(b *testing.B) {
b.ReportAllocs()

s := newBitmap()
s.RunOptimize()

b.ResetTimer()

for j := 0; j < b.N; j++ {
c9 = uint(0)
i := s.Iterator()
for i.HasNext() {
i.Next()
c9++
}
}
})

b.Run("iterator", func(b *testing.B) {
b.ReportAllocs()

s := newBitmap()

b.ResetTimer()

for j := 0; j < b.N; j++ {
c9 = uint(0)
i := s.Iterator()
for i.HasNext() {
i.Next()
c9++
}
}
})

b.Run("iterate-compressed", func(b *testing.B) {
b.ReportAllocs()

s := newBitmap()
s.RunOptimize()

b.ResetTimer()

for j := 0; j < b.N; j++ {
c9 = uint(0)
s.Iterate(func(x uint32) bool {
c9++
return true
})
}
})

b.Run("iterate", func(b *testing.B) {
b.ReportAllocs()

s := newBitmap()

b.ResetTimer()

for j := 0; j < b.N; j++ {
c9 = uint(0)
s.Iterate(func(x uint32) bool {
c9++
return true
})
}
})
}

// go test -bench BenchmarkSparseIterate -run -
Expand Down
12 changes: 12 additions & 0 deletions bitmapcontainer.go
Expand Up @@ -96,6 +96,18 @@ func (bc *bitmapContainer) maximum() uint16 {
return uint16(0)
}

func (bc *bitmapContainer) iterate(cb func(x uint16) bool) bool {
iterator := bitmapContainerShortIterator{bc, bc.NextSetBit(0)}

for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}

return true
}

type bitmapContainerShortIterator struct {
ptr *bitmapContainer
i int
Expand Down
32 changes: 32 additions & 0 deletions roaring.go
Expand Up @@ -416,6 +416,38 @@ func (rb *Bitmap) String() string {
return buffer.String()
}

// Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns
// false, the iteration is halted.
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
// There is no guarantee as to what order the values will be iterated
func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
for i := 0; i < rb.highlowcontainer.size(); i++ {
hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16
c := rb.highlowcontainer.getContainerAtIndex(i)

var shouldContinue bool
// This is hacky but it avoids allocations from invoking an interface method with a closure
switch t := c.(type) {
case *arrayContainer:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
case *runContainer16:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
case *bitmapContainer:
shouldContinue = t.iterate(func(x uint16) bool {
return cb(uint32(x) | hs)
})
}

if !shouldContinue {
break
}
}
}

// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
func (rb *Bitmap) Iterator() IntPeekable {
Expand Down
76 changes: 76 additions & 0 deletions roaring_test.go
Expand Up @@ -2308,3 +2308,79 @@ func TestBitmapFlipMaxRangeEnd(t *testing.T) {

assert.EqualValues(t, MaxRange, bm.GetCardinality())
}

func TestIterate(t *testing.T) {
rb := NewBitmap()

for i := 0; i < 300; i++ {
rb.Add(uint32(i))
}

var values []uint32
rb.Iterate(func(x uint32) bool {
values = append(values, x)
return true
})

assert.Equal(t, rb.ToArray(), values)
}

func TestIterateCompressed(t *testing.T) {
rb := NewBitmap()

for i := 0; i < 300; i++ {
rb.Add(uint32(i))
}

rb.RunOptimize()

var values []uint32
rb.Iterate(func(x uint32) bool {
values = append(values, x)
return true
})

assert.Equal(t, rb.ToArray(), values)
}

func TestIterateLargeValues(t *testing.T) {
rb := NewBitmap()

// This range of values ensures that all different types of containers will be used
for i := 150000; i < 450000; i++ {
rb.Add(uint32(i))
}

var values []uint32
rb.Iterate(func(x uint32) bool {
values = append(values, x)
return true
})

assert.Equal(t, rb.ToArray(), values)
}

func TestIterateHalt(t *testing.T) {
rb := NewBitmap()

// This range of values ensures that all different types of containers will be used
for i := 150000; i < 450000; i++ {
rb.Add(uint32(i))
}

var values []uint32
count := uint64(0)
stopAt := rb.GetCardinality() - 1
rb.Iterate(func(x uint32) bool {
values = append(values, x)
count++
if count == stopAt {
return false
}
return true
})

expected := rb.ToArray()
expected = expected[0 : len(expected)-1]
assert.Equal(t, expected, values)
}
4 changes: 3 additions & 1 deletion roaringarray.go
Expand Up @@ -4,9 +4,10 @@ import (
"bytes"
"encoding/binary"
"fmt"
"io"

snappy "github.com/glycerine/go-unsnap-stream"
"github.com/tinylib/msgp/msgp"
"io"
)

//go:generate msgp -unexported
Expand Down Expand Up @@ -38,6 +39,7 @@ type container interface {
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
xor(r container) container
getShortIterator() shortPeekable
iterate(cb func(x uint16) bool) bool
getReverseIterator() shortIterable
getManyIterator() manyIterable
contains(i uint16) bool
Expand Down
12 changes: 12 additions & 0 deletions runcontainer.go
Expand Up @@ -1162,6 +1162,18 @@ func (rc *runContainer16) newRunIterator16() *runIterator16 {
return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0}
}

func (rc *runContainer16) iterate(cb func(x uint16) bool) bool {
iterator := runIterator16{rc, 0, 0}

for iterator.hasNext() {
if !cb(iterator.next()) {
return false
}
}

return true
}

// hasNext returns false if calling next will panic. It
// returns true when there is at least one more value
// available in the iteration sequence.
Expand Down

0 comments on commit c82f60c

Please sign in to comment.