diff --git a/roaring.go b/roaring.go index 131b1247..b78f83cc 100644 --- a/roaring.go +++ b/roaring.go @@ -57,11 +57,14 @@ func (rb *Bitmap) ToBytes() ([]byte, error) { // The format is compatible with other RoaringBitmap // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec -func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) { +// Since io.Reader is regarded as a stream and cannot be read twice. +// So add cookieHeader to accept the 4-byte data that has been read in roaring64.ReadFrom. +// It is not necessary to pass cookieHeader when call roaring.ReadFrom to read the roaring32 data directly. +func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) { stream := internal.ByteInputAdapterPool.Get().(*internal.ByteInputAdapter) stream.Reset(reader) - p, err = rb.highlowcontainer.readFrom(stream) + p, err = rb.highlowcontainer.readFrom(stream, cookieHeader...) internal.ByteInputAdapterPool.Put(stream) return diff --git a/roaring64/roaring64.go b/roaring64/roaring64.go index 15651b27..21a1535e 100644 --- a/roaring64/roaring64.go +++ b/roaring64/roaring64.go @@ -11,6 +11,9 @@ import ( "github.com/RoaringBitmap/roaring" ) +const serialCookieNoRunContainer = 12346 // only arrays and bitmaps +const serialCookie = 12347 // runs, arrays, and bitmaps + // Bitmap represents a compressed bitmap where you can add integers. type Bitmap struct { highlowcontainer roaringArray64 @@ -79,16 +82,23 @@ func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) { // implementations (Java, C) and is documented here: // https://github.com/RoaringBitmap/RoaringFormatSpec func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) { - + cookie, r32, p, err := tryReadFromRoaring32(rb, stream) + if err != nil { + return p, err + } else if r32 { + return p, nil + } // TODO: Add buffer interning as in base roaring package. - sizeBuf := make([]byte, 8) + sizeBuf := make([]byte, 4) var n int n, err = stream.Read(sizeBuf) if n == 0 || err != nil { return int64(n), fmt.Errorf("error in bitmap.readFrom: could not read number of containers: %s", err) } p += int64(n) + sizeBuf = append(cookie, sizeBuf...) + size := binary.LittleEndian.Uint64(sizeBuf) rb.highlowcontainer = roaringArray64{} rb.highlowcontainer.keys = make([]uint32, size) @@ -113,6 +123,30 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (p int64, err error) { return p, nil } +func tryReadFromRoaring32(rb *Bitmap, stream io.Reader) (cookie []byte, r32 bool, p int64, err error) { + // Verify the first two bytes are a valid MagicNumber. + cookie = make([]byte, 4) + size, err := stream.Read(cookie) + if err != nil { + return cookie, false, int64(size), err + } + fileMagic := int(binary.LittleEndian.Uint16(cookie[0:2])) + if fileMagic == serialCookieNoRunContainer || fileMagic == serialCookie { + bm32 := roaring.NewBitmap() + p, err = bm32.ReadFrom(stream, cookie...) + if err != nil { + return + } + rb.highlowcontainer = roaringArray64{ + keys: []uint32{0}, + containers: []*roaring.Bitmap{bm32}, + needCopyOnWrite: []bool{false}, + } + return cookie, true, p, nil + } + return +} + // FromBuffer creates a bitmap from its serialized version stored in buffer // func (rb *Bitmap) FromBuffer(data []byte) (p int64, err error) { // diff --git a/roaring64/roaring64_test.go b/roaring64/roaring64_test.go index de8ce556..b5d92ff1 100644 --- a/roaring64/roaring64_test.go +++ b/roaring64/roaring64_test.go @@ -2,9 +2,12 @@ package roaring64 import ( "fmt" + "io/ioutil" "log" "math" "math/rand" + "os" + "path/filepath" "strconv" "testing" "unsafe" @@ -2004,3 +2007,93 @@ func IntsEquals(a, b []uint64) bool { } return true } + +func Test_tryReadFromRoaring32(t *testing.T) { + r32 := roaring.BitmapOf(1, 2, 65535, math.MaxUint32-1) + bs, err := r32.ToBytes() + if err != nil { + t.Fatal(err) + } + r64 := NewBitmap() + assert.True(t, r64.UnmarshalBinary(bs) == nil) + assert.True(t, r64.Contains(1)) + assert.True(t, r64.Contains(2)) + assert.True(t, r64.Contains(65535)) + assert.True(t, r64.Contains(math.MaxUint32-1)) + +} + +func Test_tryReadFromRoaring32_File(t *testing.T) { + tempDir, err := ioutil.TempDir("./", "testdata") + if err != nil { + t.Fail() + } + defer os.RemoveAll(tempDir) + + r32 := roaring.BitmapOf(1, 2, 65535, math.MaxUint32-1) + bs, err := r32.ToBytes() + if err != nil { + t.Fatal(err) + } + name := filepath.Join(tempDir, "r32") + if err := ioutil.WriteFile(name, bs, 0600); err != nil { + t.Fatal(err) + } + file, err := os.Open(name) + if err != nil { + t.Fatal(err) + } + defer file.Close() + + r64 := NewBitmap() + r64.ReadFrom(file) + assert.True(t, r64.Contains(1)) + assert.True(t, r64.Contains(2)) + assert.True(t, r64.Contains(65535)) + assert.True(t, r64.Contains(math.MaxUint32-1)) +} + +func Test_tryReadFromRoaring32WithRoaring64(t *testing.T) { + r64 := BitmapOf(1, 65535, math.MaxUint32, math.MaxUint64) + bs, err := r64.ToBytes() + if err != nil { + t.Fatal(err) + } + nr64 := NewBitmap() + assert.True(t, nr64.UnmarshalBinary(bs) == nil) + assert.True(t, nr64.Contains(1)) + assert.True(t, nr64.Contains(65535)) + assert.True(t, nr64.Contains(math.MaxUint32)) + assert.True(t, nr64.Contains(math.MaxUint64)) +} + +func Test_tryReadFromRoaring32WithRoaring64_File(t *testing.T) { + tempDir, err := ioutil.TempDir("./", "testdata") + if err != nil { + t.Fail() + } + defer os.RemoveAll(tempDir) + + r64 := BitmapOf(1, 65535, math.MaxUint32, math.MaxUint64) + bs, err := r64.ToBytes() + if err != nil { + t.Fatal(err) + } + + name := filepath.Join(tempDir, "r32") + if err := ioutil.WriteFile(name, bs, 0600); err != nil { + t.Fatal(err) + } + file, err := os.Open(name) + if err != nil { + t.Fatal(err) + } + defer file.Close() + + nr64 := NewBitmap() + nr64.ReadFrom(file) + assert.True(t, nr64.Contains(1)) + assert.True(t, nr64.Contains(65535)) + assert.True(t, nr64.Contains(math.MaxUint32)) + assert.True(t, nr64.Contains(math.MaxUint64)) +} diff --git a/roaringarray.go b/roaringarray.go index 077695bc..4aefc6a3 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -549,11 +549,19 @@ func (ra *roaringArray) toBytes() ([]byte, error) { return buf.Bytes(), err } -func (ra *roaringArray) readFrom(stream internal.ByteInput) (int64, error) { - cookie, err := stream.ReadUInt32() - - if err != nil { - return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) +func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte) (int64, error) { + var cookie uint32 + var err error + if len(cookieHeader) > 0 && len(cookieHeader) != 4 { + return int64(len(cookieHeader)), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: incorrect size of cookie header") + } + if len(cookieHeader) == 4 { + cookie = binary.LittleEndian.Uint32(cookieHeader) + } else { + cookie, err = stream.ReadUInt32() + if err != nil { + return stream.GetReadBytes(), fmt.Errorf("error in roaringArray.readFrom: could not read initial cookie: %s", err) + } } var size uint32 @@ -570,7 +578,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput) (int64, error) { } } else if cookie == serialCookieNoRunContainer { size, err = stream.ReadUInt32() - if err != nil { return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read a bitmap size: %s", err) }