This repository was archived by the owner on Feb 21, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 232
/
Copy pathunmarshal_binary.go
259 lines (241 loc) · 6.85 KB
/
unmarshal_binary.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
// Copyright 2022 Molecula Corp. (DBA FeatureBase).
// SPDX-License-Identifier: Apache-2.0
package roaring
import (
"bytes"
"errors"
"io"
"unsafe"
)
// UnmarshalBinary reads Pilosa's format, or upstream roaring (mostly;
// it may not handle some edge cases), and decodes them into the given
// bitmap, replacing the existing contents.
func (b *Bitmap) UnmarshalBinary(data []byte) (err error) {
if data == nil {
return errors.New("no roaring bitmap provided")
}
var itr RoaringIterator
var itrKey uint64
var itrCType byte
var itrN int
var itrLen int
var itrPointer *uint16
var itrErr error
itr, err = NewRoaringIterator(data)
if err != nil {
return err
}
if itr == nil {
return errors.New("failed to create roaring iterator, but don't know why")
}
b.Containers.Reset()
itrKey, itrCType, itrN, itrLen, itrPointer, itrErr = itr.Next()
for itrErr == nil {
var newC *Container
switch itrCType {
case ContainerArray:
newC = NewContainerArray((*[4096]uint16)(unsafe.Pointer(itrPointer))[:itrLen:itrLen])
case ContainerRun:
newC = NewContainerRunN((*[2048]Interval16)(unsafe.Pointer(itrPointer))[:itrLen:itrLen], int32(itrN))
case ContainerBitmap:
newC = NewContainerBitmapN((*[1024]uint64)(unsafe.Pointer(itrPointer))[:1024:itrLen], int32(itrN))
default:
panic("invalid container type")
}
// If we're using the iterator's pointer, we're "mapped". But
// for instance, small arrays may use their own data structures,
// which is fine.
newC.setMapped(newC.pointer == itrPointer)
if !b.preferMapping {
newC = newC.unmapOrClone()
}
b.Containers.Put(itrKey, newC)
itrKey, itrCType, itrN, itrLen, itrPointer, itrErr = itr.Next()
}
// note: if we get a non-EOF err, it's possible that we made SOME
// changes but didn't log them. I don't have a good solution to this.
if itrErr != io.EOF {
return itrErr
}
// Read ops log until the end of the file.
b.ops = 0
b.opN = 0
buf, lastValidOffset := itr.Remaining()
for {
// Exit when there are no more ops to parse.
if len(buf) == 0 {
break
}
// Unmarshal the op and apply it.
var opr op
if err := opr.UnmarshalBinary(buf); err != nil {
return newFileShouldBeTruncatedError(err, int64(lastValidOffset))
}
opr.apply(b)
// Increase the op count.
b.ops++
b.opN += opr.count()
// Move the buffer forward.
opSize := opr.size()
buf = buf[opSize:]
lastValidOffset += int64(opSize)
}
return nil
}
func (b *Bitmap) MarshalBinary() ([]byte, error) {
var buf bytes.Buffer
_, err := b.WriteTo(&buf)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// InspectBinary reads a roaring bitmap, plus a possible ops log,
// and reports back on the contents, including distinguishing between
// the original ops log and the post-ops-log contents.
func InspectBinary(data []byte, mapped bool, info *BitmapInfo) (b *Bitmap, mappedAny bool, err error) {
b = NewFileBitmap()
b.PreferMapping(mapped)
if data == nil {
return b, mappedAny, errors.New("no roaring bitmap provided")
}
var itr RoaringIterator
var itrKey uint64
var itrCType byte
var itrN int
var itrLen int
var itrPointer *uint16
var itrErr error
itr, err = NewRoaringIterator(data)
if err != nil {
return b, mappedAny, err
}
if itr == nil {
return b, mappedAny, errors.New("failed to create roaring iterator, but don't know why")
}
keys := itr.Len()
info.Containers = make([]ContainerInfo, 0, keys)
itrKey, itrCType, itrN, itrLen, itrPointer, itrErr = itr.Next()
for itrErr == nil {
var size int
switch itrCType {
case ContainerArray:
size = int(itrN) * 2
case ContainerBitmap:
size = 8192
case ContainerRun:
size = itrLen*interval16Size + runCountHeaderSize
}
var newC *Container
switch itrCType {
case ContainerArray:
newC = NewContainerArray((*[4096]uint16)(unsafe.Pointer(itrPointer))[:itrLen:itrLen])
case ContainerRun:
newC = NewContainerRunN((*[2048]Interval16)(unsafe.Pointer(itrPointer))[:itrLen:itrLen], int32(itrN))
case ContainerBitmap:
newC = NewContainerBitmapN((*[1024]uint64)(unsafe.Pointer(itrPointer))[:1024:itrLen], int32(itrN))
default:
panic("invalid container type")
}
// If our pointer isn't itrPointer, we aren't actually mapped.
newC.setMapped(newC.pointer == itrPointer)
if !mapped {
newC = newC.unmapOrClone()
}
// Pristine means this is the original object read in from
// roaring data, even if it's not mapped, which this is for
// now.
newC.flags |= flagPristine
if newC.flags&flagMapped != 0 {
mappedAny = true
}
info.Containers = append(info.Containers, ContainerInfo{
N: newC.n,
Mapped: newC.flags&flagMapped != 0,
Type: containerTypeNames[itrCType],
Alloc: size,
Pointer: uintptr(unsafe.Pointer(newC.pointer)),
Key: itrKey,
Flags: newC.flags.String(),
})
info.ContainerCount++
info.BitCount += uint64(newC.n)
b.Containers.Put(itrKey, newC)
itrKey, itrCType, itrN, itrLen, itrPointer, itrErr = itr.Next()
}
// note: if we get a non-EOF err, it's possible that we made SOME
// changes but didn't log them. I don't have a good solution to this.
if itrErr != io.EOF {
return b, mappedAny, itrErr
}
// stash pointer ranges
info.From = uintptr(unsafe.Pointer(&data[0]))
info.To = info.From + uintptr(len(data))
// Read ops log until the end of the file.
b.ops = 0
b.opN = 0
buf, lastValidOffset := itr.Remaining()
// if there's no ops log, we're done and can just return the
// info so far.
if len(buf) == 0 {
return b, mappedAny, err
}
for {
// Exit when there are no more ops to parse.
if len(buf) == 0 {
break
}
// Unmarshal the op and apply it.
var opr op
if err = opr.UnmarshalBinary(buf); err != nil {
// we break out here, but we continue on to
// return the bitmap as-is, along with data about
// it, and the error. this lets us share the
// "is anything mapped" check with that code.
break
}
opr.apply(b)
// Increase the op count.
if info != nil {
info.Ops++
info.OpN += opr.count()
info.OpDetails = append(info.OpDetails, opr.info())
}
// Move the buffer forward.
opSize := opr.size()
buf = buf[opSize:]
lastValidOffset += int64(opSize)
}
citer, _ := b.Containers.Iterator(0)
// it's possible the ops log unmapped every mapped container, so we recheck.
mappedAny = false
if info == nil {
for citer.Next() {
_, c := citer.Value()
if c.Mapped() {
mappedAny = true
break
}
}
return b, mappedAny, err
}
// now we want to compute the actual container and bit counts after
// ops, and create a report of just the containers which got changed.
info.ContainerCount = 0
info.BitCount = 0
for citer.Next() {
k, c := citer.Value()
if c.Mapped() {
mappedAny = true
}
info.ContainerCount++
info.BitCount += uint64(c.N())
if c.flags&flagPristine != 0 {
continue
}
ci := c.info()
ci.Key = k
info.OpContainers = append(info.OpContainers, ci)
}
return b, mappedAny, err
}