Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start validation #423

Merged
merged 11 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
47 changes: 35 additions & 12 deletions arraycontainer.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
package roaring

import (
"errors"
"fmt"
)

type arrayContainer struct {
content []uint16
}

var (
ErrArrayIncorrectSort = errors.New("incorrectly sorted array")
ErrArrayInvalidSize = errors.New("invalid array size")
)

func (ac *arrayContainer) String() string {
s := "{"
for it := ac.getShortIterator(); it.hasNext(); {
Expand All @@ -26,8 +32,7 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
_ = x[len(ac.content)-1+i]
_ = ac.content[len(ac.content)-1]
for k := 0; k < len(ac.content); k++ {
x[k+i] =
uint32(ac.content[k]) | mask
x[k+i] = uint32(ac.content[k]) | mask
}
return i + len(ac.content)
}
Expand Down Expand Up @@ -168,7 +173,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
}
answer := newArrayContainer()
answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure
answer.content = make([]uint16, newCardinality, newCardinality) // a hack for sure

copy(answer.content, ac.content[:startIndex])
outPos := startIndex
Expand All @@ -194,11 +199,9 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
}
answer.content = answer.content[:newCardinality]
return answer

}

func (ac *arrayContainer) equals(o container) bool {

srb, ok := o.(*arrayContainer)
if ok {
// Check if the containers are the same object.
Expand Down Expand Up @@ -239,8 +242,8 @@ func (ac *arrayContainer) toBitmapContainer() *bitmapContainer {
bc := newBitmapContainer()
bc.loadData(ac)
return bc

}

func (ac *arrayContainer) iadd(x uint16) (wasNew bool) {
// Special case adding to the end of the container.
l := len(ac.content)
Expand Down Expand Up @@ -352,7 +355,7 @@ func (ac *arrayContainer) ior(a container) container {
return ac.iorArray(x)
case *bitmapContainer:
return a.(*bitmapContainer).orArray(ac)
//return ac.iorBitmap(x) // note: this does not make sense
// return ac.iorBitmap(x) // note: this does not make sense
case *runContainer16:
if x.isFull() {
return x.clone()
Expand Down Expand Up @@ -589,7 +592,6 @@ func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container {
}
ac.content = ac.content[:pos]
return ac

}

func (ac *arrayContainer) xor(a container) container {
Expand Down Expand Up @@ -630,7 +632,6 @@ func (ac *arrayContainer) xorArray(value2 *arrayContainer) container {
length := exclusiveUnion2by2(value1.content, value2.content, answer.content)
answer.content = answer.content[:length]
return answer

}

func (ac *arrayContainer) andNot(a container) container {
Expand Down Expand Up @@ -822,7 +823,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
} else { // no expansion needed
ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1)
if cardinalityChange < 0 {

for i := startIndex + newValuesInRange; i < newCardinality; i++ {
ac.content[i] = ac.content[i-cardinalityChange]
}
Expand Down Expand Up @@ -915,7 +915,6 @@ func (ac *arrayContainer) rank(x uint16) int {
return answer + 1
}
return -answer - 1

}

func (ac *arrayContainer) selectInt(x uint16) int {
Expand Down Expand Up @@ -1039,7 +1038,6 @@ func (ac *arrayContainer) numberOfRuns() (nr int) {

// convert to run or array *if needed*
func (ac *arrayContainer) toEfficientContainer() container {

numRuns := ac.numberOfRuns()

sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
Expand Down Expand Up @@ -1099,3 +1097,28 @@ func (ac *arrayContainer) addOffset(x uint16) (container, container) {

return low, high
}

// validate checks cardinality and sort order of the array container
func (ac *arrayContainer) validate() error {
cardinality := ac.getCardinality()

if cardinality <= 0 {
return ErrArrayInvalidSize
}

if cardinality > arrayDefaultMaxSize {
return ErrArrayInvalidSize
}

previous := ac.content[0]
for i := 1; i < len(ac.content); i++ {
next := ac.content[i]
if previous > next {
return ErrArrayIncorrectSort
}
previous = next

}

return nil
}
50 changes: 46 additions & 4 deletions arraycontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,11 @@ func TestArrayContainerNumberOfRuns025(t *testing.T) {
}

func TestArrayContainerIaddRangeNearMax068(t *testing.T) {
iv := []interval16{newInterval16Range(65525, 65527),
iv := []interval16{
newInterval16Range(65525, 65527),
newInterval16Range(65530, 65530),
newInterval16Range(65534, 65535)}
newInterval16Range(65534, 65535),
}
rc := newRunContainer16TakeOwnership(iv)

ac2 := rc.toArrayContainer()
Expand All @@ -262,9 +264,11 @@ func TestArrayContainerIaddRangeNearMax068(t *testing.T) {
}

func TestArrayContainerEtc070(t *testing.T) {
iv := []interval16{newInterval16Range(65525, 65527),
iv := []interval16{
newInterval16Range(65525, 65527),
newInterval16Range(65530, 65530),
newInterval16Range(65534, 65535)}
newInterval16Range(65534, 65535),
}
rc := newRunContainer16TakeOwnership(iv)
ac := rc.toArrayContainer()

Expand Down Expand Up @@ -431,6 +435,44 @@ func TestArrayContainerResetTo(t *testing.T) {
})
}

func TestArrayContainerValidation(t *testing.T) {
array := newArrayContainer()
upperBound := arrayDefaultMaxSize

err := array.validate()
assert.Error(t, err)

for i := 0; i < upperBound; i++ {
array.iadd(uint16(i))
}
err = array.validate()
assert.NoError(t, err)

// Introduce a sort error
// We know that upperbound is unsorted because we populated up to upperbound
array.content[500] = uint16(upperBound + upperBound)

err = array.validate()
assert.Error(t, err)

array = newArrayContainer()

// Technically a run, but make sure the incorrect sort detection handles equal elements
for i := 0; i < upperBound; i++ {
array.iadd(uint16(1))
}
err = array.validate()
assert.NoError(t, err)

array = newArrayContainer()

for i := 0; i < 2*upperBound; i++ {
array.iadd(uint16(i))
}
err = array.validate()
assert.Error(t, err)
}

// go test -bench BenchmarkShortIteratorAdvance -run -
func BenchmarkShortIteratorAdvanceArray(b *testing.B) {
benchmarkContainerIteratorAdvance(b, newArrayContainer())
Expand Down
38 changes: 28 additions & 10 deletions bitmapcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ func (bcsi *bitmapContainerShortIterator) next() uint16 {
bcsi.i = bcsi.ptr.NextSetBit(uint(bcsi.i) + 1)
return uint16(j)
}

func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
Expand Down Expand Up @@ -241,7 +242,7 @@ func (bc *bitmapContainer) getSizeInBytes() int {
}

func (bc *bitmapContainer) serializedSizeInBytes() int {
//return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
// return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
return len(bc.bitmap) * 8
}

Expand Down Expand Up @@ -441,7 +442,7 @@ func (bc *bitmapContainer) ior(a container) container {
if bc.isFull() {
return newRunContainer16Range(0, MaxUint16)
}
//bc.computeCardinality()
// bc.computeCardinality()
return bc
}
panic(fmt.Errorf("unsupported container type %T", a))
Expand Down Expand Up @@ -819,9 +820,8 @@ func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container {
}
ac := newArrayContainerSize(newcardinality)
fillArrayAND(ac.content, bc.bitmap, value2.bitmap)
ac.content = ac.content[:newcardinality] //not sure why i need this
ac.content = ac.content[:newcardinality] // not sure why i need this
return ac

}

func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool {
Expand All @@ -842,7 +842,6 @@ func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool {
}
}
return false

}

func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container {
Expand Down Expand Up @@ -995,7 +994,7 @@ func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) containe
return bc
}

func (bc *bitmapContainer) contains(i uint16) bool { //testbit
func (bc *bitmapContainer) contains(i uint16) bool { // testbit
x := uint(i)
w := bc.bitmap[x>>6]
mask := uint64(1) << (x & 63)
Expand Down Expand Up @@ -1051,7 +1050,7 @@ func (bc *bitmapContainer) toArrayContainer() *arrayContainer {
}

func (bc *bitmapContainer) fillArray(container []uint16) {
//TODO: rewrite in assembly
// TODO: rewrite in assembly
pos := 0
base := 0
for k := 0; k < len(bc.bitmap); k++ {
Expand Down Expand Up @@ -1141,7 +1140,6 @@ func (bc *bitmapContainer) numberOfRuns() int {

// convert to run or array *if needed*
func (bc *bitmapContainer) toEfficientContainer() container {

numRuns := bc.numberOfRuns()

sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
Expand All @@ -1159,7 +1157,6 @@ func (bc *bitmapContainer) toEfficientContainer() container {
}

func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {

if len(rc.iv) == 1 {
return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last()))
}
Expand All @@ -1169,7 +1166,7 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1)
bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start)
}
//bc.computeCardinality()
// bc.computeCardinality()
return bc
}

Expand Down Expand Up @@ -1234,3 +1231,24 @@ func (bc *bitmapContainer) addOffset(x uint16) (container, container) {

return low, high
}

// validate checks that the container size is non-negative
func (bc *bitmapContainer) validate() error {
if bc.cardinality < arrayDefaultMaxSize {
return fmt.Errorf("bitmap container size was less than: %d", arrayDefaultMaxSize)
}

if maxCapacity < len(bc.bitmap)*64 {
return fmt.Errorf("bitmap slize size %d exceeded max capacity %d", maxCapacity, len(bc.bitmap)*64)
}

if bc.cardinality > maxCapacity {
return fmt.Errorf("bitmap container size was greater than: %d", maxCapacity)
}
bearrito marked this conversation as resolved.
Show resolved Hide resolved

if bc.cardinality != int(popcntSlice(bc.bitmap)) {
return fmt.Errorf("bitmap container size %d did not match underlying slice length: %d", bc.cardinality, int(popcntSlice(bc.bitmap)))
}

return nil
}
43 changes: 43 additions & 0 deletions bitmapcontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,46 @@ func TestBitmapContainerIAndNot(t *testing.T) {
require.ElementsMatch(t, []uint16{12279, 12282, 12285}, bc.(*arrayContainer).content)
require.Equal(t, 3, bc.getCardinality())
}

func TestBitMapContainerValidate(t *testing.T) {
bc := newBitmapContainer()

for i := 0; i < arrayDefaultMaxSize-1; i++ {
bc.iadd(uint16(i * 3))
}
// bitmap containers should have size arrayDefaultMaxSize or larger
assert.Error(t, bc.validate())
bc.iadd(math.MaxUint16)

assert.NoError(t, bc.validate())

// Break the max cardinality invariant
bc.cardinality = maxCapacity + 1

assert.Error(t, bc.validate())

// violate cardinality underlying container size invariant
bc = newBitmapContainer()
for i := 0; i < arrayDefaultMaxSize+1; i++ {
bc.iadd(uint16(i * 3))
}
assert.NoError(t, bc.validate())
bc.cardinality += 1
assert.Error(t, bc.validate())

// check that underlying packed slice doesn't exceed maxCapacity
bc = newBitmapContainer()
orginalSize := (1 << 16) / 64
for i := 0; i < orginalSize; i++ {
bc.cardinality += 1
bc.bitmap[i] = uint64(1)
}

appendSize := ((1 << 16) - orginalSize) + 1
for i := 0; i < appendSize; i++ {
bc.cardinality += 1
bc.bitmap = append(bc.bitmap, uint64(1))
}

assert.Error(t, bc.validate())
}