Skip to content

Commit

Permalink
Merge pull request #290 from davidchen-cn/readfrom_roaring32
Browse files Browse the repository at this point in the history
Modified the implementation of ReadFrom to support roaring 64 to directly read roaring 32 data.
  • Loading branch information
lemire committed Jan 25, 2021
2 parents 62f3e85 + a53842b commit 4f9df8a
Show file tree
Hide file tree
Showing 4 changed files with 147 additions and 10 deletions.
7 changes: 5 additions & 2 deletions roaring.go
Expand Up @@ -57,11 +57,14 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
// The format is compatible with other RoaringBitmap
// implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec
func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
// Since io.Reader is regarded as a stream and cannot be read twice.
// So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom.
// It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly.
func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
stream := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter)
stream.Reset(reader)

p, err = rb.highlowcontainer.readFrom(stream)
p, err = rb.highlowcontainer.readFrom(stream, cookieHeader...)
internal.ByteInputAdapterPool.Put(stream)

return
Expand Down
38 changes: 36 additions & 2 deletions roaring64/roaring64.go
Expand Up @@ -11,6 +11,9 @@ import (
"github.com/RoaringBitmap/roaring"
)

const serialCookieNoRunContainer = 12346 // only arrays and bitmaps
const serialCookie = 12347 // runs, arrays, and bitmaps

// Bitmap represents a compressed bitmap where you can add integers.
type Bitmap struct {
highlowcontainer roaringArray64
Expand Down Expand Up @@ -79,16 +82,23 @@ func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {
// implementations (Java, C) and is documented here:
// https://github.com/RoaringBitmap/RoaringFormatSpec
func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) {

cookie, r32, p, err := tryReadFromRoaring32(rb, stream)
if err != nil {
return p, err
} else if r32 {
return p, nil
}
// TODO: Add buffer interning as in base roaring package.

sizeBuf := make([]byte, 8)
sizeBuf := make([]byte, 4)
var n int
n, err = stream.Read(sizeBuf)
if n == 0 || err != nil {
return int64(n), fmt.Errorf("error in bitmap.readFrom: could not read number of containers: %s", err)
}
p += int64(n)
sizeBuf = append(cookie, sizeBuf...)

size := binary.LittleEndian.Uint64(sizeBuf)
rb.highlowcontainer = roaringArray64{}
rb.highlowcontainer.keys = make([]uint32, size)
Expand All @@ -113,6 +123,30 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) {
return p, nil
}

func tryReadFromRoaring32(rb *Bitmap, stream io.Reader) (cookie []byte, r32 bool, p int64, err error) {
// Verify the first two bytes are a valid MagicNumber.
cookie = make([]byte, 4)
size, err := stream.Read(cookie)
if err != nil {
return cookie, false, int64(size), err
}
fileMagic := int(binary.LittleEndian.Uint16(cookie[0:2]))
if fileMagic == serialCookieNoRunContainer || fileMagic == serialCookie {
bm32 := roaring.NewBitmap()
p, err = bm32.ReadFrom(stream, cookie...)
if err != nil {
return
}
rb.highlowcontainer = roaringArray64{
keys: []uint32{0},
containers: []*roaring.Bitmap{bm32},
needCopyOnWrite: []bool{false},
}
return cookie, true, p, nil
}
return
}

// FromBuffer creates a bitmap from its serialized version stored in buffer
// func (rb *Bitmap) FromBuffer(data []byte) (p int64, err error) {
//
Expand Down
93 changes: 93 additions & 0 deletions roaring64/roaring64_test.go
Expand Up @@ -2,9 +2,12 @@ package roaring64

import (
"fmt"
"io/ioutil"
"log"
"math"
"math/rand"
"os"
"path/filepath"
"strconv"
"testing"
"unsafe"
Expand Down Expand Up @@ -2004,3 +2007,93 @@ func IntsEquals(a, b []uint64) bool {
}
return true
}

func Test_tryReadFromRoaring32(t *testing.T) {
r32 := roaring.BitmapOf(1, 2, 65535, math.MaxUint32-1)
bs, err := r32.ToBytes()
if err != nil {
t.Fatal(err)
}
r64 := NewBitmap()
assert.True(t, r64.UnmarshalBinary(bs) == nil)
assert.True(t, r64.Contains(1))
assert.True(t, r64.Contains(2))
assert.True(t, r64.Contains(65535))
assert.True(t, r64.Contains(math.MaxUint32-1))

}

func Test_tryReadFromRoaring32_File(t *testing.T) {
tempDir, err := ioutil.TempDir("./", "testdata")
if err != nil {
t.Fail()
}
defer os.RemoveAll(tempDir)

r32 := roaring.BitmapOf(1, 2, 65535, math.MaxUint32-1)
bs, err := r32.ToBytes()
if err != nil {
t.Fatal(err)
}
name := filepath.Join(tempDir, "r32")
if err := ioutil.WriteFile(name, bs, 0600); err != nil {
t.Fatal(err)
}
file, err := os.Open(name)
if err != nil {
t.Fatal(err)
}
defer file.Close()

r64 := NewBitmap()
r64.ReadFrom(file)
assert.True(t, r64.Contains(1))
assert.True(t, r64.Contains(2))
assert.True(t, r64.Contains(65535))
assert.True(t, r64.Contains(math.MaxUint32-1))
}

func Test_tryReadFromRoaring32WithRoaring64(t *testing.T) {
r64 := BitmapOf(1, 65535, math.MaxUint32, math.MaxUint64)
bs, err := r64.ToBytes()
if err != nil {
t.Fatal(err)
}
nr64 := NewBitmap()
assert.True(t, nr64.UnmarshalBinary(bs) == nil)
assert.True(t, nr64.Contains(1))
assert.True(t, nr64.Contains(65535))
assert.True(t, nr64.Contains(math.MaxUint32))
assert.True(t, nr64.Contains(math.MaxUint64))
}

func Test_tryReadFromRoaring32WithRoaring64_File(t *testing.T) {
tempDir, err := ioutil.TempDir("./", "testdata")
if err != nil {
t.Fail()
}
defer os.RemoveAll(tempDir)

r64 := BitmapOf(1, 65535, math.MaxUint32, math.MaxUint64)
bs, err := r64.ToBytes()
if err != nil {
t.Fatal(err)
}

name := filepath.Join(tempDir, "r32")
if err := ioutil.WriteFile(name, bs, 0600); err != nil {
t.Fatal(err)
}
file, err := os.Open(name)
if err != nil {
t.Fatal(err)
}
defer file.Close()

nr64 := NewBitmap()
nr64.ReadFrom(file)
assert.True(t, nr64.Contains(1))
assert.True(t, nr64.Contains(65535))
assert.True(t, nr64.Contains(math.MaxUint32))
assert.True(t, nr64.Contains(math.MaxUint64))
}
19 changes: 13 additions & 6 deletions roaringarray.go
Expand Up @@ -549,11 +549,19 @@ func (ra *roaringArray) toBytes() ([]byte, error) {
return buf.Bytes(), err
}

func (ra *roaringArray) readFrom(stream internal.ByteInput) (int64, error) {
cookie, err := stream.ReadUInt32()

if err != nil {
return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) {
var cookie uint32
var err error
if len(cookieHeader) > 0 && len(cookieHeader) != 4 {
return int64(len(cookieHeader)), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: incorrect size of cookie header")
}
if len(cookieHeader) == 4 {
cookie = binary.LittleEndian.Uint32(cookieHeader)
} else {
cookie, err = stream.ReadUInt32()
if err != nil {
return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err)
}
}

var size uint32
Expand All @@ -570,7 +578,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput) (int64, error) {
}
} else if cookie == serialCookieNoRunContainer {
size, err = stream.ReadUInt32()

if err != nil {
return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err)
}
Expand Down

0 comments on commit 4f9df8a

Please sign in to comment.