From 8a564087f6247389e587abd84858274fb901b726 Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Tue, 30 Apr 2024 20:30:21 -0400
Subject: [PATCH 01/11] Start validation

---
 arraycontainer.go      |  42 +++++++++----
 arraycontainer_test.go |  52 ++++++++++++++--
 bitmapcontainer.go     |  27 +++++---
 roaring.go             |  28 +++++----
 roaring_test.go        |  64 +++++++++++++++----
 roaringarray.go        |  47 +++++++++-----
 runcontainer.go        | 102 +++++++++++++++++++-----------
 runcontainer_test.go   | 138 ++++++++++++++++++++++++++---------------
 8 files changed, 349 insertions(+), 151 deletions(-)

diff --git a/arraycontainer.go b/arraycontainer.go
index 80fa676e..9a0d4a01 100644
--- a/arraycontainer.go
+++ b/arraycontainer.go
@@ -1,6 +1,7 @@
 package roaring
 
 import (
+	"errors"
 	"fmt"
 )
 
@@ -26,8 +27,7 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
 	_ = x[len(ac.content)-1+i]
 	_ = ac.content[len(ac.content)-1]
 	for k := 0; k < len(ac.content); k++ {
-		x[k+i] =
-			uint32(ac.content[k]) | mask
+		x[k+i] = uint32(ac.content[k]) | mask
 	}
 	return i + len(ac.content)
 }
@@ -168,7 +168,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
 		return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
 	}
 	answer := newArrayContainer()
-	answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure
+	answer.content = make([]uint16, newCardinality, newCardinality) // a hack for sure
 
 	copy(answer.content, ac.content[:startIndex])
 	outPos := startIndex
@@ -194,11 +194,9 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
 	}
 	answer.content = answer.content[:newCardinality]
 	return answer
-
 }
 
 func (ac *arrayContainer) equals(o container) bool {
-
 	srb, ok := o.(*arrayContainer)
 	if ok {
 		// Check if the containers are the same object.
@@ -239,8 +237,8 @@ func (ac *arrayContainer) toBitmapContainer() *bitmapContainer {
 	bc := newBitmapContainer()
 	bc.loadData(ac)
 	return bc
-
 }
+
 func (ac *arrayContainer) iadd(x uint16) (wasNew bool) {
 	// Special case adding to the end of the container.
 	l := len(ac.content)
@@ -352,7 +350,7 @@ func (ac *arrayContainer) ior(a container) container {
 		return ac.iorArray(x)
 	case *bitmapContainer:
 		return a.(*bitmapContainer).orArray(ac)
-		//return ac.iorBitmap(x) // note: this does not make sense
+		// return ac.iorBitmap(x) // note: this does not make sense
 	case *runContainer16:
 		if x.isFull() {
 			return x.clone()
@@ -589,7 +587,6 @@ func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container {
 	}
 	ac.content = ac.content[:pos]
 	return ac
-
 }
 
 func (ac *arrayContainer) xor(a container) container {
@@ -630,7 +627,6 @@ func (ac *arrayContainer) xorArray(value2 *arrayContainer) container {
 	length := exclusiveUnion2by2(value1.content, value2.content, answer.content)
 	answer.content = answer.content[:length]
 	return answer
-
 }
 
 func (ac *arrayContainer) andNot(a container) container {
@@ -822,7 +818,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
 	} else { // no expansion needed
 		ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1)
 		if cardinalityChange < 0 {
-
 			for i := startIndex + newValuesInRange; i < newCardinality; i++ {
 				ac.content[i] = ac.content[i-cardinalityChange]
 			}
@@ -915,7 +910,6 @@ func (ac *arrayContainer) rank(x uint16) int {
 		return answer + 1
 	}
 	return -answer - 1
-
 }
 
 func (ac *arrayContainer) selectInt(x uint16) int {
@@ -1039,7 +1033,6 @@ func (ac *arrayContainer) numberOfRuns() (nr int) {
 
 // convert to run or array *if needed*
 func (ac *arrayContainer) toEfficientContainer() container {
-
 	numRuns := ac.numberOfRuns()
 
 	sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
@@ -1099,3 +1092,28 @@ func (ac *arrayContainer) addOffset(x uint16) (container, container) {
 
 	return low, high
 }
+
+// validate checks cardinality and sort order of the array container
+func (ac *arrayContainer) validate() error {
+	cardinality := ac.getCardinality()
+
+	if cardinality <= 0 {
+		return errors.New("zero or negative size")
+	}
+
+	if cardinality > arrayDefaultMaxSize {
+		return errors.New("exceeds default max size")
+	}
+
+	previous := ac.content[0]
+	for i := 1; i < len(ac.content); i++ {
+		next := ac.content[i]
+		if previous > next {
+			return errors.New("incorrectly sorted array")
+		}
+		previous = next
+
+	}
+
+	return nil
+}
diff --git a/arraycontainer_test.go b/arraycontainer_test.go
index fd360bec..bc79c6fd 100644
--- a/arraycontainer_test.go
+++ b/arraycontainer_test.go
@@ -241,9 +241,11 @@ func TestArrayContainerNumberOfRuns025(t *testing.T) {
 }
 
 func TestArrayContainerIaddRangeNearMax068(t *testing.T) {
-	iv := []interval16{newInterval16Range(65525, 65527),
+	iv := []interval16{
+		newInterval16Range(65525, 65527),
 		newInterval16Range(65530, 65530),
-		newInterval16Range(65534, 65535)}
+		newInterval16Range(65534, 65535),
+	}
 	rc := newRunContainer16TakeOwnership(iv)
 
 	ac2 := rc.toArrayContainer()
@@ -262,9 +264,11 @@ func TestArrayContainerIaddRangeNearMax068(t *testing.T) {
 }
 
 func TestArrayContainerEtc070(t *testing.T) {
-	iv := []interval16{newInterval16Range(65525, 65527),
+	iv := []interval16{
+		newInterval16Range(65525, 65527),
 		newInterval16Range(65530, 65530),
-		newInterval16Range(65534, 65535)}
+		newInterval16Range(65534, 65535),
+	}
 	rc := newRunContainer16TakeOwnership(iv)
 	ac := rc.toArrayContainer()
 
@@ -431,6 +435,46 @@ func TestArrayContainerResetTo(t *testing.T) {
 	})
 }
 
+func TestArrayContainerValidation(t *testing.T) {
+	// TODO Use table based testing
+
+	array := newArrayContainer()
+	upperBound := arrayDefaultMaxSize
+
+	err := array.validate()
+	assert.Error(t, err)
+
+	for i := 0; i < upperBound; i++ {
+		array.iadd(uint16(i))
+	}
+	err = array.validate()
+	assert.NoError(t, err)
+
+	// Introduce a sort error
+	// We know that upperbound is unsorted because we populated up to upperbound
+	array.content[500] = uint16(upperBound + upperBound)
+
+	err = array.validate()
+	assert.Error(t, err)
+
+	array = newArrayContainer()
+
+	// Technically a run, but make sure the incorrect sort detection handles equal elements
+	for i := 0; i < upperBound; i++ {
+		array.iadd(uint16(1))
+	}
+	err = array.validate()
+	assert.NoError(t, err)
+
+	array = newArrayContainer()
+
+	for i := 0; i < 2*upperBound; i++ {
+		array.iadd(uint16(i))
+	}
+	err = array.validate()
+	assert.NoError(t, err)
+}
+
 // go test -bench BenchmarkShortIteratorAdvance -run -
 func BenchmarkShortIteratorAdvanceArray(b *testing.B) {
 	benchmarkContainerIteratorAdvance(b, newArrayContainer())
diff --git a/bitmapcontainer.go b/bitmapcontainer.go
index bf08bfca..2c7197d4 100644
--- a/bitmapcontainer.go
+++ b/bitmapcontainer.go
@@ -1,6 +1,7 @@
 package roaring
 
 import (
+	"errors"
 	"fmt"
 	"unsafe"
 )
@@ -116,6 +117,7 @@ func (bcsi *bitmapContainerShortIterator) next() uint16 {
 	bcsi.i = bcsi.ptr.NextSetBit(uint(bcsi.i) + 1)
 	return uint16(j)
 }
+
 func (bcsi *bitmapContainerShortIterator) hasNext() bool {
 	return bcsi.i >= 0
 }
@@ -241,7 +243,7 @@ func (bc *bitmapContainer) getSizeInBytes() int {
 }
 
 func (bc *bitmapContainer) serializedSizeInBytes() int {
-	//return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
+	// return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
 	return len(bc.bitmap) * 8
 }
 
@@ -441,7 +443,7 @@ func (bc *bitmapContainer) ior(a container) container {
 		if bc.isFull() {
 			return newRunContainer16Range(0, MaxUint16)
 		}
-		//bc.computeCardinality()
+		// bc.computeCardinality()
 		return bc
 	}
 	panic(fmt.Errorf("unsupported container type %T", a))
@@ -819,9 +821,8 @@ func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container {
 	}
 	ac := newArrayContainerSize(newcardinality)
 	fillArrayAND(ac.content, bc.bitmap, value2.bitmap)
-	ac.content = ac.content[:newcardinality] //not sure why i need this
+	ac.content = ac.content[:newcardinality] // not sure why i need this
 	return ac
-
 }
 
 func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool {
@@ -842,7 +843,6 @@ func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool {
 		}
 	}
 	return false
-
 }
 
 func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container {
@@ -995,7 +995,7 @@ func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) containe
 	return bc
 }
 
-func (bc *bitmapContainer) contains(i uint16) bool { //testbit
+func (bc *bitmapContainer) contains(i uint16) bool { // testbit
 	x := uint(i)
 	w := bc.bitmap[x>>6]
 	mask := uint64(1) << (x & 63)
@@ -1051,7 +1051,7 @@ func (bc *bitmapContainer) toArrayContainer() *arrayContainer {
 }
 
 func (bc *bitmapContainer) fillArray(container []uint16) {
-	//TODO: rewrite in assembly
+	// TODO: rewrite in assembly
 	pos := 0
 	base := 0
 	for k := 0; k < len(bc.bitmap); k++ {
@@ -1141,7 +1141,6 @@ func (bc *bitmapContainer) numberOfRuns() int {
 
 // convert to run or array *if needed*
 func (bc *bitmapContainer) toEfficientContainer() container {
-
 	numRuns := bc.numberOfRuns()
 
 	sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
@@ -1159,7 +1158,6 @@ func (bc *bitmapContainer) toEfficientContainer() container {
 }
 
 func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
-
 	if len(rc.iv) == 1 {
 		return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last()))
 	}
@@ -1169,7 +1167,7 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
 		setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1)
 		bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start)
 	}
-	//bc.computeCardinality()
+	// bc.computeCardinality()
 	return bc
 }
 
@@ -1234,3 +1232,12 @@ func (bc *bitmapContainer) addOffset(x uint16) (container, container) {
 
 	return low, high
 }
+
+// validate checks that the container size is non-negative
+func (bc *bitmapContainer) validate() error {
+	if bc.getCardinality() < 0 {
+		return errors.New("negative size")
+	}
+
+	return nil
+}
diff --git a/roaring.go b/roaring.go
index a31cdbd9..19efbca0 100644
--- a/roaring.go
+++ b/roaring.go
@@ -26,7 +26,6 @@ func (rb *Bitmap) ToBase64() (string, error) {
 	buf := new(bytes.Buffer)
 	_, err := rb.WriteTo(buf)
 	return base64.StdEncoding.EncodeToString(buf.Bytes()), err
-
 }
 
 // FromBase64 deserializes a bitmap from Base64
@@ -54,10 +53,12 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
 	return rb.highlowcontainer.toBytes()
 }
 
-const wordSize = uint64(64)
-const log2WordSize = uint64(6)
-const capacity = ^uint64(0)
-const bitmapContainerSize = (1 << 16) / 64 // bitmap size in words
+const (
+	wordSize            = uint64(64)
+	log2WordSize        = uint64(6)
+	capacity            = ^uint64(0)
+	bitmapContainerSize = (1 << 16) / 64 // bitmap size in words
+)
 
 // DenseSize returns the size of the bitmap when stored as a dense bitmap.
 func (rb *Bitmap) DenseSize() uint64 {
@@ -960,7 +961,6 @@ func (rb *Bitmap) CheckedAdd(x uint32) bool {
 	newac := newArrayContainer()
 	rb.highlowcontainer.insertNewKeyValueAt(-i-1, hb, newac.iaddReturnMinimized(lowbits(x)))
 	return true
-
 }
 
 // AddInt adds the integer x to the bitmap (convenience method: the parameter is casted to uint32 and we call Add)
@@ -998,7 +998,6 @@ func (rb *Bitmap) CheckedRemove(x uint32) bool {
 		return C.getCardinality() < oldcard
 	}
 	return false
-
 }
 
 // IsEmpty returns true if the Bitmap is empty (it is faster than doing (GetCardinality() == 0))
@@ -1088,7 +1087,7 @@ main:
 						break main
 					}
 					s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
-				} else { //s1 > s2
+				} else { // s1 > s2
 					pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
 					if pos2 == length2 {
 						break main
@@ -1187,7 +1186,7 @@ main:
 						break main
 					}
 					s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
-				} else { //s1 > s2
+				} else { // s1 > s2
 					pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
 					if pos2 == length2 {
 						break main
@@ -1256,7 +1255,7 @@ main:
 						break main
 					}
 					s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
-				} else { //s1 > s2
+				} else { // s1 > s2
 					pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
 					if pos2 == length2 {
 						break main
@@ -1396,7 +1395,7 @@ main:
 						break main
 					}
 					s1 = rb.highlowcontainer.getKeyAtIndex(pos1)
-				} else { //s1 > s2
+				} else { // s1 > s2
 					pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
 					if pos2 == length2 {
 						break main
@@ -1584,7 +1583,7 @@ main:
 					}
 					s1 = x1.highlowcontainer.getKeyAtIndex(pos1)
 					s2 = x2.highlowcontainer.getKeyAtIndex(pos2)
-				} else { //s1 > s2
+				} else { // s1 > s2
 					pos2 = x2.highlowcontainer.advanceUntil(s1, pos2)
 					if pos2 == length2 {
 						break main
@@ -1632,7 +1631,6 @@ func BitmapOf(dat ...uint32) *Bitmap {
 // The function uses 64-bit parameters even though a Bitmap stores 32-bit values because it is allowed and meaningful to use [0,uint64(0x100000000)) as a range
 // while uint64(0x100000000) cannot be represented as a 32-bit value.
 func (rb *Bitmap) Flip(rangeStart, rangeEnd uint64) {
-
 	if rangeEnd > MaxUint32+1 {
 		panic("rangeEnd > MaxUint32+1")
 	}
@@ -1916,3 +1914,7 @@ func (rb *Bitmap) Stats() Statistics {
 	}
 	return stats
 }
+
+func (rb *Bitmap) Validate() error {
+	return rb.highlowcontainer.validate()
+}
diff --git a/roaring_test.go b/roaring_test.go
index 080b9f02..d1071606 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -17,7 +17,6 @@ func checkValidity(t *testing.T, rb *Bitmap) {
 	t.Helper()
 
 	for _, c := range rb.highlowcontainer.containers {
-
 		switch c.(type) {
 		case *arrayContainer:
 			if c.getCardinality() > arrayDefaultMaxSize {
@@ -74,6 +73,25 @@ func hashTest(t *testing.T, N uint64) {
 	assert.Equal(t, count, len(hashes))
 }
 
+func buildRuns(includeBroken bool) *runContainer16 {
+	rc := &runContainer16{}
+	if includeBroken {
+		for i := 0; i < 100; i++ {
+			start := i * 100
+			end := start + 100
+			rc.iv = append(rc.iv, newInterval16Range(uint16(start), uint16(end)))
+		}
+	}
+
+	for i := 0; i < 100; i++ {
+		start := i*100 + i*2
+		end := start + 100
+		rc.iv = append(rc.iv, newInterval16Range(uint16(start), uint16(end)))
+	}
+
+	return rc
+}
+
 func TestReverseIteratorCount(t *testing.T) {
 	array := []int{2, 63, 64, 65, 4095, 4096, 4097, 4159, 4160, 4161, 5000, 20000, 66666}
 	for _, testSize := range array {
@@ -477,7 +495,7 @@ func TestRangeRemovalFromContent(t *testing.T) {
 	bm.RemoveRange(0, 30000)
 	c := bm.GetCardinality()
 
-	assert.EqualValues(t, 00, c)
+	assert.EqualValues(t, 0o0, c)
 }
 
 func TestFlipOnEmpty(t *testing.T) {
@@ -596,7 +614,7 @@ func TestBitmapExtra(t *testing.T) {
 					clonebs1.InPlaceSymmetricDifference(bs2)
 					assert.True(t, equalsBitSet(clonebs1, Xor(rb1, rb2)))
 
-					//testing NOTAND
+					// testing NOTAND
 					clonebs1 = bs1.Clone()
 					clonebs1.InPlaceDifference(bs2)
 					assert.True(t, equalsBitSet(clonebs1, AndNot(rb1, rb2)))
@@ -779,7 +797,7 @@ func TestBitmap(t *testing.T) {
 
 	t.Run("Test AND 3", func(t *testing.T) {
 		var arrayand [11256]uint32
-		//393,216
+		// 393,216
 		pos := 0
 		rr := NewBitmap()
 		for k := 4000; k < 4256; k++ {
@@ -856,7 +874,6 @@ func TestBitmap(t *testing.T) {
 
 		assert.Equal(t, len(arrayres), len(arrayand))
 		assert.True(t, ok)
-
 	})
 
 	t.Run("Test AND 4", func(t *testing.T) {
@@ -869,7 +886,7 @@ func TestBitmap(t *testing.T) {
 		for i := 200000; i < 400000; i += 14 {
 			rb2.AddInt(i)
 		}
-		//TODO: Bitmap.And(bm,bm2)
+		// TODO: Bitmap.And(bm,bm2)
 		andresult := And(rb, rb2)
 		off := And(rb2, rb)
 
@@ -1247,7 +1264,7 @@ func TestBitmap(t *testing.T) {
 		rb := NewBitmap()
 		rb1 := Flip(rb, 100000, 132000)
 		rb2 := Flip(rb1, 65536, 120000)
-		//rbcard := rb2.GetCardinality()
+		// rbcard := rb2.GetCardinality()
 
 		bs := bitset.New(0)
 		for i := uint(65536); i < 100000; i++ {
@@ -1303,7 +1320,7 @@ func TestBitmap(t *testing.T) {
 		numCases := 1000
 		rb := NewBitmap()
 		bs := bitset.New(0)
-		//Random r = new Random(3333);
+		// Random r = new Random(3333);
 		checkTime := 2.0
 
 		for i := 0; i < numCases; i++ {
@@ -1717,6 +1734,7 @@ func TestBitmap(t *testing.T) {
 		assert.True(t, valide)
 	})
 }
+
 func TestXORtest4(t *testing.T) {
 	t.Run("XORtest 4", func(t *testing.T) {
 		rb := NewBitmap()
@@ -1764,7 +1782,7 @@ func TestXORtest4(t *testing.T) {
 		rb.Xor(rb2)
 		assert.True(t, xorresult2.Equals(rb))
 	})
-	//need to add the massives
+	// need to add the massives
 }
 
 func TestNextMany(t *testing.T) {
@@ -1874,7 +1892,7 @@ func rTest(t *testing.T, N int) {
 
 			assert.True(t, equalsBitSet(clonebs1, Xor(rb1, rb2)))
 
-			//testing NOTAND
+			// testing NOTAND
 			clonebs1 = bs1.Clone()
 			clonebs1.InPlaceDifference(bs2)
 
@@ -2618,6 +2636,30 @@ func TestFromBitSet(t *testing.T) {
 	})
 }
 
+func TestValidation(t *testing.T) {
+	bm := NewBitmap()
+	bm.AddRange(306, 406)
+	bm.AddRange(0, 100)
+	bm.AddRange(102, 202)
+	bm.AddRange(204, 304)
+	assert.NoError(t, bm.Validate())
+
+	randomEntries := make([]uint32, 0, 1000)
+	for i := 0; i < 1000; i++ {
+		randomEntries = append(randomEntries, rand.Uint32())
+	}
+
+	bm.AddMany(randomEntries)
+	assert.NoError(t, bm.Validate())
+
+	randomEntries = make([]uint32, 0, 1000)
+	for i := 0; i < 1000; i++ {
+		randomEntries = append(randomEntries, uint32(i))
+	}
+	bm.AddMany(randomEntries)
+	assert.NoError(t, bm.Validate())
+}
+
 func BenchmarkFromDense(b *testing.B) {
 	testDense(func(name string, rb *Bitmap) {
 		dense := make([]uint64, rb.DenseSize())
@@ -2679,7 +2721,7 @@ func BenchmarkInPlaceArrayUnions(b *testing.B) {
 	for i := 0; i < 100; i++ {
 		bitmap := NewBitmap()
 		for j := 0; j < 100; j++ {
-			//keep all entries in [0,4096), so they stay arrays.
+			// keep all entries in [0,4096), so they stay arrays.
 			bitmap.Add(uint32(rand.Intn(arrayDefaultMaxSize)))
 		}
 		componentBitmaps[i] = bitmap
diff --git a/roaringarray.go b/roaringarray.go
index 079195dd..8e8bf5db 100644
--- a/roaringarray.go
+++ b/roaringarray.go
@@ -3,6 +3,7 @@ package roaring
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"fmt"
 	"io"
 
@@ -30,7 +31,7 @@ type container interface {
 	iadd(x uint16) bool                   // inplace, returns true if x was new.
 	iaddReturnMinimized(uint16) container // may change return type to minimize storage.
 
-	//addRange(start, final int) container  // range is [firstOfRange,lastOfRange) (unused)
+	// addRange(start, final int) container  // range is [firstOfRange,lastOfRange) (unused)
 	iaddRange(start, endx int) container // i stands for inplace, range is [firstOfRange,endx)
 
 	iremove(x uint16) bool                   // inplace, returns true if x was present.
@@ -61,7 +62,7 @@ type container interface {
 	lazyOR(r container) container
 	lazyIOR(r container) container
 	getSizeInBytes() int
-	//removeRange(start, final int) container  // range is [firstOfRange,lastOfRange) (unused)
+	// removeRange(start, final int) container  // range is [firstOfRange,lastOfRange) (unused)
 	iremoveRange(start, final int) container // i stands for inplace, range is [firstOfRange,lastOfRange)
 	selectInt(x uint16) int                  // selectInt returns the xth integer in the container
 	serializedSizeInBytes() int
@@ -71,6 +72,8 @@ type container interface {
 	toEfficientContainer() container
 	String() string
 	containerType() contype
+
+	validate() error
 }
 
 type contype uint8
@@ -178,7 +181,6 @@ func (ra *roaringArray) appendCopiesUntil(sa roaringArray, stoppingKey uint16) {
 		} else {
 			// since there is no copy-on-write, we need to clone the container (this is important)
 			ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
-
 		}
 	}
 }
@@ -204,7 +206,6 @@ func (ra *roaringArray) appendCopiesAfter(sa roaringArray, beforeStart uint16) {
 		} else {
 			// since there is no copy-on-write, we need to clone the container (this is important)
 			ra.appendContainer(sa.keys[i], sa.containers[i].clone(), thiscopyonewrite)
-
 		}
 	}
 }
@@ -239,7 +240,6 @@ func (ra *roaringArray) clear() {
 }
 
 func (ra *roaringArray) clone() *roaringArray {
-
 	sa := roaringArray{}
 	sa.copyOnWrite = ra.copyOnWrite
 
@@ -325,7 +325,6 @@ func (ra *roaringArray) getUnionedWritableContainer(pos int, other container) co
 		return ra.getContainerAtIndex(pos).or(other)
 	}
 	return ra.getContainerAtIndex(pos).ior(other)
-
 }
 
 func (ra *roaringArray) getWritableContainerAtIndex(i int) container {
@@ -455,7 +454,6 @@ func (ra *roaringArray) headerSize() uint64 {
 		return 4 + (size+7)/8 + 8*size // - 4 because we pack the size with the cookie
 	}
 	return 4 + 4 + 8*size
-
 }
 
 // should be dirt cheap
@@ -489,7 +487,7 @@ func (ra *roaringArray) writeTo(w io.Writer) (n int64, err error) {
 		binary.LittleEndian.PutUint16(buf[2:], uint16(len(ra.keys)-1))
 		nw += 2
 		// compute isRun bitmap without temporary allocation
-		var runbitmapslice = buf[nw : nw+isRunSizeInBytes]
+		runbitmapslice := buf[nw : nw+isRunSizeInBytes]
 		for i, c := range ra.containers {
 			switch c.(type) {
 			case *runContainer16:
@@ -577,7 +575,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
 		// create is-run-container bitmap
 		isRunBitmapSize := (int(size) + 7) / 8
 		isRunBitmap, err = stream.Next(isRunBitmapSize)
-
 		if err != nil {
 			return stream.GetReadBytes(), fmt.Errorf("malformed bitmap, failed to read is-run bitmap, got: %s", err)
 		}
@@ -596,7 +593,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
 
 	// descriptive header
 	buf, err := stream.Next(2 * 2 * int(size))
-
 	if err != nil {
 		return stream.GetReadBytes(), fmt.Errorf("failed to read descriptive header: %s", err)
 	}
@@ -637,13 +633,11 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
 		if isRunBitmap != nil && isRunBitmap[i/8]&(1<<(i%8)) != 0 {
 			// run container
 			nr, err := stream.ReadUInt16()
-
 			if err != nil {
 				return 0, fmt.Errorf("failed to read runtime container size: %s", err)
 			}
 
 			buf, err := stream.Next(int(nr) * 4)
-
 			if err != nil {
 				return stream.GetReadBytes(), fmt.Errorf("failed to read runtime container content: %s", err)
 			}
@@ -656,7 +650,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
 		} else if card > arrayDefaultMaxSize {
 			// bitmap container
 			buf, err := stream.Next(arrayDefaultMaxSize * 2)
-
 			if err != nil {
 				return stream.GetReadBytes(), fmt.Errorf("failed to read bitmap container: %s", err)
 			}
@@ -670,7 +663,6 @@ func (ra *roaringArray) readFrom(stream internal.ByteInput, cookieHeader ...byte
 		} else {
 			// array container
 			buf, err := stream.Next(card * 2)
-
 			if err != nil {
 				return stream.GetReadBytes(), fmt.Errorf("failed to read array container: %s", err)
 			}
@@ -759,3 +751,30 @@ func (ra *roaringArray) needsCopyOnWrite(i int) bool {
 func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
 	ra.needCopyOnWrite[i] = true
 }
+
+// validate checks the referential integrity
+// ensures len(keys) == len(containers), recurses and checks each container type
+func (ra *roaringArray) validate() error {
+	if len(ra.keys) == 0 {
+		return errors.New("keys were empty")
+	}
+
+	if len(ra.keys) != len(ra.containers) {
+		return errors.New("keys and containers length did not match")
+	}
+
+	if len(ra.keys) != len(ra.needCopyOnWrite) {
+		return errors.New("keys and copy-on-write length did not match")
+	}
+
+	// TODO: We could parallelize this, not sure if that's warranted
+	for _, container := range ra.containers {
+
+		err := container.validate()
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
diff --git a/runcontainer.go b/runcontainer.go
index 7098ba28..1328bff9 100644
--- a/runcontainer.go
+++ b/runcontainer.go
@@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
 import (
+	"errors"
 	"fmt"
 	"sort"
 	"unsafe"
@@ -201,7 +202,6 @@ func newRunContainer16FromVals(alreadySorted bool, vals ...uint16) *runContainer
 // somewhat efficiently. For reference, see the Java
 // https://github.com/RoaringBitmap/RoaringBitmap/blob/master/src/main/java/org/roaringbitmap/RunContainer.java#L145-L192
 func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
-
 	rc := &runContainer16{}
 	nbrRuns := bc.numberOfRuns()
 	if nbrRuns == 0 {
@@ -251,7 +251,6 @@ func newRunContainer16FromBitmapContainer(bc *bitmapContainer) *runContainer16 {
 		curWord = curWordWith1s & (curWordWith1s + 1)
 		// We've lathered and rinsed, so repeat...
 	}
-
 }
 
 // newRunContainer16FromArray populates a new
@@ -293,7 +292,6 @@ func newRunContainer16FromArray(arr *arrayContainer) *runContainer16 {
 // If you have a small number of additions to an already
 // big runContainer16, calling Add() may be faster.
 func (rc *runContainer16) set(alreadySorted bool, vals ...uint16) {
-
 	rc2 := newRunContainer16FromVals(alreadySorted, vals...)
 	un := rc.union(rc2)
 	rc.iv = un.iv
@@ -374,7 +372,6 @@ func intersectInterval16s(a, b interval16) (res interval16, isEmpty bool) {
 // union merges two runContainer16s, producing
 // a new runContainer16 with the union of rc and b.
 func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
-
 	// rc is also known as 'a' here, but golint insisted we
 	// call it rc for consistency with the rest of the methods.
 
@@ -457,7 +454,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
 					break aAdds
 				}
 			}
-
 		}
 
 		if !bDone {
@@ -471,7 +467,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
 					break bAdds
 				}
 			}
-
 		}
 
 		m = append(m, merged)
@@ -489,7 +484,6 @@ func (rc *runContainer16) union(b *runContainer16) *runContainer16 {
 
 // unionCardinality returns the cardinality of the merger of two runContainer16s,  the union of rc and b.
 func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
-
 	// rc is also known as 'a' here, but golint insisted we
 	// call it rc for consistency with the rest of the methods.
 	answer := uint(0)
@@ -528,7 +522,7 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
 			}
 			if !mergedUpdated {
 				// we know that merged is disjoint from cura and curb
-				//m = append(m, merged)
+				// m = append(m, merged)
 				answer += uint(merged.last()) - uint(merged.start) + 1
 				mergedUsed = false
 			}
@@ -539,11 +533,11 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
 			if !canMerge16(cura, curb) {
 				if cura.start < curb.start {
 					answer += uint(cura.last()) - uint(cura.start) + 1
-					//m = append(m, cura)
+					// m = append(m, cura)
 					na++
 				} else {
 					answer += uint(curb.last()) - uint(curb.start) + 1
-					//m = append(m, curb)
+					// m = append(m, curb)
 					nb++
 				}
 			} else {
@@ -574,7 +568,6 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
 					break aAdds
 				}
 			}
-
 		}
 
 		if !bDone {
@@ -588,10 +581,9 @@ func (rc *runContainer16) unionCardinality(b *runContainer16) uint {
 					break bAdds
 				}
 			}
-
 		}
 
-		//m = append(m, merged)
+		// m = append(m, merged)
 		answer += uint(merged.last()) - uint(merged.start) + 1
 	}
 	for _, r := range rc.iv[na:] {
@@ -615,7 +607,6 @@ func (rc *runContainer16) indexOfIntervalAtOrAfter(key int, startIndex int) int
 // intersect returns a new runContainer16 holding the
 // intersection of rc (also known as 'a')  and b.
 func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
-
 	a := rc
 	numa := int(len(a.iv))
 	numb := int(len(b.iv))
@@ -645,8 +636,7 @@ func (rc *runContainer16) intersect(b *runContainer16) *runContainer16 {
 toploop:
 	for acuri < numa && bcuri < numb {
 
-		isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
-			intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
+		isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
 
 		if !isOverlap {
 			switch {
@@ -664,7 +654,6 @@ toploop:
 				}
 				bstart = int(b.iv[bcuri].start)
 			}
-
 		} else {
 			// isOverlap
 			output = append(output, intersection)
@@ -748,8 +737,7 @@ toploop:
 	for acuri < numa && bcuri < numb {
 		pass++
 
-		isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection =
-			intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
+		isOverlap, isLeftoverA, isLeftoverB, leftoverstart, intersection = intersectWithLeftover16(astart, int(a.iv[acuri].last()), bstart, int(b.iv[bcuri].last()))
 
 		if !isOverlap {
 			switch {
@@ -767,7 +755,6 @@ toploop:
 				}
 				bstart = int(b.iv[bcuri].start)
 			}
-
 		} else {
 			// isOverlap
 			answer += int(intersection.last()) - int(intersection.start) + 1
@@ -1014,8 +1001,10 @@ func newRunContainer16TakeOwnership(iv []interval16) *runContainer16 {
 	return rc
 }
 
-const baseRc16Size = int(unsafe.Sizeof(runContainer16{}))
-const perIntervalRc16Size = int(unsafe.Sizeof(interval16{}))
+const (
+	baseRc16Size        = int(unsafe.Sizeof(runContainer16{}))
+	perIntervalRc16Size = int(unsafe.Sizeof(interval16{}))
+)
 
 const baseDiskRc16Size = int(unsafe.Sizeof(uint16(0)))
 
@@ -1274,7 +1263,7 @@ func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
 				break
 			}
 		} else {
-			ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
+			ri.curPosInIndex += uint16(moreVals) // moreVals always fits in uint16
 		}
 	}
 
@@ -1315,7 +1304,7 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
 				break
 			}
 		} else {
-			ri.curPosInIndex += uint16(moreVals) //moreVals always fits in uint16
+			ri.curPosInIndex += uint16(moreVals) // moreVals always fits in uint16
 		}
 	}
 
@@ -1324,7 +1313,6 @@ func (ri *runIterator16) nextMany64(hs uint64, buf []uint64) int {
 
 // remove removes key from the container.
 func (rc *runContainer16) removeKey(key uint16) (wasPresent bool) {
-
 	var index int
 	index, wasPresent, _ = rc.search(int(key))
 	if !wasPresent {
@@ -1361,7 +1349,7 @@ func (rc *runContainer16) deleteAt(curIndex *int, curPosInIndex *uint16) {
 		*curPosInIndex--
 		// if we leave *curIndex alone, then Next() will work properly even after the delete.
 	default:
-		//middle
+		// middle
 		// split into two, adding an interval16
 		new0 := newInterval16Range(rc.iv[ci].start, rc.iv[ci].start+*curPosInIndex-1)
 
@@ -1376,7 +1364,6 @@ func (rc *runContainer16) deleteAt(curIndex *int, curPosInIndex *uint16) {
 		*curIndex++
 		*curPosInIndex = 0
 	}
-
 }
 
 func have4Overlap16(astart, alast, bstart, blast int) bool {
@@ -1503,6 +1490,13 @@ func (iv interval16) isSuperSetOf(b interval16) bool {
 	return iv.start <= b.start && b.last() <= iv.last()
 }
 
+func (iv interval16) hasEmptyIntersection(b interval16) bool {
+	c1 := iv.start <= b.start && b.start <= iv.last()
+	c2 := b.start <= iv.start && iv.start <= b.last()
+
+	return !c1 && !c2
+}
+
 func (iv interval16) subtractInterval(del interval16) (left []interval16, delcount int) {
 	isect, isEmpty := intersectInterval16s(iv, del)
 
@@ -1678,7 +1672,6 @@ func (rc *runContainer16) isubtract(del interval16) {
 // port of run_container_andnot from CRoaring...
 // https://github.com/RoaringBitmap/CRoaring/blob/master/src/containers/run.c#L435-L496
 func (rc *runContainer16) AndNotRunContainer16(b *runContainer16) *runContainer16 {
-
 	if len(b.iv) == 0 || len(rc.iv) == 0 {
 		return rc
 	}
@@ -1949,7 +1942,6 @@ func (rc *runContainer16) getManyIterator() manyIterable {
 // add the values in the range [firstOfRange, endx). endx
 // is still abe to express 2^16 because it is an int not an uint16.
 func (rc *runContainer16) iaddRange(firstOfRange, endx int) container {
-
 	if firstOfRange > endx {
 		panic(fmt.Sprintf("invalid %v = endx > firstOfRange", endx))
 	}
@@ -2002,7 +1994,6 @@ func (rc *runContainer16) not(firstOfRange, endx int) container {
 // makes 2 more passes through the arrays than should be
 // strictly necessary. Measure both ways though--this may not matter.
 func (rc *runContainer16) Not(firstOfRange, endx int) *runContainer16 {
-
 	if firstOfRange > endx {
 		panic(fmt.Sprintf("invalid %v = endx > firstOfRange == %v", endx, firstOfRange))
 	}
@@ -2066,12 +2057,12 @@ func (rc *runContainer16) equals(o container) bool {
 	rit := rc.getShortIterator()
 	bit := o.getShortIterator()
 
-	//k := 0
+	// k := 0
 	for rit.hasNext() {
 		if bit.next() != rit.next() {
 			return false
 		}
-		//k++
+		// k++
 	}
 	return true
 }
@@ -2132,7 +2123,7 @@ func (rc *runContainer16) andBitmapContainerCardinality(bc *bitmapContainer) int
 	for i := range rc.iv {
 		answer += bc.getCardinalityInRange(uint(rc.iv[i].start), uint(rc.iv[i].last())+1)
 	}
-	//bc.computeCardinality()
+	// bc.computeCardinality()
 	return answer
 }
 
@@ -2190,7 +2181,6 @@ func (rc *runContainer16) inplaceUnion(rc2 *runContainer16) container {
 }
 
 func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
-
 	it := bc.getShortIterator()
 	for it.hasNext() {
 		rc.Add(it.next())
@@ -2206,7 +2196,7 @@ func (rc *runContainer16) iorArray(ac *arrayContainer) container {
 		return rc
 	}
 	var cardMinusOne uint16
-	//TODO: perform the union algorithm in-place using rc.iv
+	// TODO: perform the union algorithm in-place using rc.iv
 	// this can be done with methods like the in-place array container union
 	// but maybe lazily moving the remaining elements back.
 	rc.iv, cardMinusOne = runArrayUnionToRuns(rc, ac)
@@ -2511,7 +2501,6 @@ func (rc *runContainer16) toArrayContainer() *arrayContainer {
 }
 
 func newRunContainer16FromContainer(c container) *runContainer16 {
-
 	switch x := c.(type) {
 	case *runContainer16:
 		return x.Clone()
@@ -2622,3 +2611,44 @@ func (rc *runContainer16) addOffset(x uint16) (container, container) {
 
 	return low, high
 }
+
+// intervalOverlaps returns an error if the intervals overlap e.g have non-empty intersection
+func intervalOverlaps(outer interval16, inner interval16) error {
+	if !outer.hasEmptyIntersection(inner) {
+		return errors.New("intervals overlap")
+	}
+
+	return nil
+}
+
+// validate checks the run container referential integrity
+// Ensures runs are not degenerate, non-contiguous and non-overlapping
+func (rc *runContainer16) validate() error {
+	if rc.getCardinality() == 0 {
+		return errors.New("zero intervals")
+	}
+
+	for outeridx := range rc.iv {
+
+		if rc.iv[outeridx].length == 0 {
+			return errors.New("zero run length")
+		}
+
+		for inneridx := outeridx + 1; inneridx < len(rc.iv); inneridx++ {
+			outer := rc.iv[outeridx]
+			inner := rc.iv[inneridx]
+
+			if outer.equal(inner) {
+				return errors.New("intervals were equal")
+			}
+
+			err := intervalOverlaps(outer, inner)
+			if err != nil {
+				return err
+			}
+		}
+
+	}
+
+	return nil
+}
diff --git a/runcontainer_test.go b/runcontainer_test.go
index 86549a2c..8f3835c4 100644
--- a/runcontainer_test.go
+++ b/runcontainer_test.go
@@ -190,7 +190,8 @@ func TestRleRunIterator16(t *testing.T) {
 			rc := newRunContainer16CopyIv([]interval16{
 				newInterval16Range(4, 7),
 				newInterval16Range(11, 13),
-				newInterval16Range(18, 21)})
+				newInterval16Range(18, 21),
+			})
 
 			assert.EqualValues(t, 11, rc.getCardinality())
 
@@ -210,7 +211,8 @@ func TestRleRunIterator16(t *testing.T) {
 			rc := newRunContainer16CopyIv([]interval16{
 				newInterval16Range(4, 7),
 				newInterval16Range(11, 13),
-				newInterval16Range(18, 21)})
+				newInterval16Range(18, 21),
+			})
 			expectedCard := 11
 			expectedVals := []uint32{4, 5, 6, 7, 11, 12, 13, 18, 19, 20, 21}
 			hs := uint32(1 << 16)
@@ -307,7 +309,6 @@ func TestRleRunIterator16(t *testing.T) {
 }
 
 func TestRleRunReverseIterator16(t *testing.T) {
-
 	t.Run("RunReverseIterator16 unit tests for next, hasNext, and peekNext should pass", func(t *testing.T) {
 		{
 			rc := newRunContainer16()
@@ -411,12 +412,14 @@ func TestRleIntersection16(t *testing.T) {
 					newInterval16Range(2, 4),
 					newInterval16Range(8, 9),
 					newInterval16Range(14, 16),
-					newInterval16Range(20, 22)},
+					newInterval16Range(20, 22),
+				},
 			)
 			f := newRunContainer16TakeOwnership(
 				[]interval16{
 					newInterval16Range(3, 18),
-					newInterval16Range(22, 23)},
+					newInterval16Range(22, 23),
+				},
 			)
 
 			{
@@ -453,7 +456,6 @@ func TestRleIntersection16(t *testing.T) {
 
 func TestRleRandomIntersection16(t *testing.T) {
 	t.Run("RunContainer.intersect of two RunContainers should return their intersection, and this should hold over randomized container content when compared to intersection done with hash maps", func(t *testing.T) {
-
 		seed := int64(42)
 		rand.Seed(seed)
 
@@ -510,7 +512,7 @@ func TestRleRandomIntersection16(t *testing.T) {
 				// RunContainer's Intersect
 				brle := newRunContainer16FromVals(false, b...)
 
-				//arle := newRunContainer16FromVals(false, a...)
+				// arle := newRunContainer16FromVals(false, a...)
 				// instead of the above line, create from array
 				// get better test coverage:
 				arr := newArrayContainerRange(int(first), int(second))
@@ -519,7 +521,7 @@ func TestRleRandomIntersection16(t *testing.T) {
 
 				isect := arle.intersect(brle)
 
-				//showHash("hashi", hashi)
+				// showHash("hashi", hashi)
 
 				for k := range hashi {
 					assert.True(t, isect.contains(uint16(k)))
@@ -532,14 +534,11 @@ func TestRleRandomIntersection16(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
 func TestRleRandomUnion16(t *testing.T) {
-
 	t.Run("RunContainer.union of two RunContainers should return their union, and this should hold over randomized container content when compared to union done with hash maps", func(t *testing.T) {
-
 		seed := int64(42)
 		rand.Seed(seed)
 
@@ -580,7 +579,7 @@ func TestRleRandomUnion16(t *testing.T) {
 					hashu[k] = true
 				}
 
-				//showHash("hashu", hashu)
+				// showHash("hashu", hashu)
 
 				// RunContainer's Union
 				arle := newRunContainer16()
@@ -898,8 +897,8 @@ func TestRle16RandomIntersectAgainstOtherContainers010(t *testing.T) {
 					mb[r1] = true
 				}
 
-				//showArray16(a, "a")
-				//showArray16(b, "b")
+				// showArray16(a, "a")
+				// showArray16(b, "b")
 
 				// hash version of intersect:
 				hashi := make(map[int]bool)
@@ -948,12 +947,10 @@ func TestRle16RandomIntersectAgainstOtherContainers010(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
 func TestRle16RandomUnionAgainstOtherContainers011(t *testing.T) {
-
 	t.Run("runContainer16 `or` operation against other container types should correctly do the intersection", func(t *testing.T) {
 		seed := int64(42)
 		rand.Seed(seed)
@@ -982,8 +979,8 @@ func TestRle16RandomUnionAgainstOtherContainers011(t *testing.T) {
 					mb[r1] = true
 				}
 
-				//showArray16(a, "a")
-				//showArray16(b, "b")
+				// showArray16(a, "a")
+				// showArray16(b, "b")
 
 				// hash version of union
 				hashi := make(map[int]bool)
@@ -1030,12 +1027,10 @@ func TestRle16RandomUnionAgainstOtherContainers011(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
 func TestRle16RandomInplaceUnionAgainstOtherContainers012(t *testing.T) {
-
 	t.Run("runContainer16 `ior` inplace union operation against other container types should correctly do the intersection", func(t *testing.T) {
 		seed := int64(42)
 		rand.Seed(seed)
@@ -1064,8 +1059,8 @@ func TestRle16RandomInplaceUnionAgainstOtherContainers012(t *testing.T) {
 					mb[r1] = true
 				}
 
-				//showArray16(a, "a")
-				//showArray16(b, "b")
+				// showArray16(a, "a")
+				// showArray16(b, "b")
 
 				// hash version of union
 				hashi := make(map[int]bool)
@@ -1118,12 +1113,10 @@ func TestRle16RandomInplaceUnionAgainstOtherContainers012(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
 func TestRle16RandomInplaceIntersectAgainstOtherContainers014(t *testing.T) {
-
 	t.Run("runContainer16 `iand` inplace-and operation against other container types should correctly do the intersection", func(t *testing.T) {
 		seed := int64(42)
 		rand.Seed(seed)
@@ -1152,8 +1145,8 @@ func TestRle16RandomInplaceIntersectAgainstOtherContainers014(t *testing.T) {
 					mb[r1] = true
 				}
 
-				//showArray16(a, "a")
-				//showArray16(b, "b")
+				// showArray16(a, "a")
+				// showArray16(b, "b")
 
 				// hash version of intersect:
 				hashi := make(map[int]bool)
@@ -1206,12 +1199,10 @@ func TestRle16RandomInplaceIntersectAgainstOtherContainers014(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
 func TestRle16RemoveApi015(t *testing.T) {
-
 	t.Run("runContainer16 `remove` (a minus b) should work", func(t *testing.T) {
 		seed := int64(42)
 		rand.Seed(seed)
@@ -1240,8 +1231,8 @@ func TestRle16RemoveApi015(t *testing.T) {
 					mb[r1] = true
 				}
 
-				//showArray16(a, "a")
-				//showArray16(b, "b")
+				// showArray16(a, "a")
+				// showArray16(b, "b")
 
 				// hash version of remove:
 				hashrm := make(map[int]bool)
@@ -1270,7 +1261,6 @@ func TestRle16RemoveApi015(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
@@ -1283,7 +1273,6 @@ func showArray16(a []uint16, name string) {
 }
 
 func TestRle16RandomAndNot016(t *testing.T) {
-
 	t.Run("runContainer16 `andNot` operation against other container types should correctly do the and-not operation", func(t *testing.T) {
 		seed := int64(42)
 		rand.Seed(seed)
@@ -1312,8 +1301,8 @@ func TestRle16RandomAndNot016(t *testing.T) {
 					mb[r1] = true
 				}
 
-				//showArray16(a, "a")
-				//showArray16(b, "b")
+				// showArray16(a, "a")
+				// showArray16(b, "b")
 
 				// hash version of and-not
 				hashi := make(map[int]bool)
@@ -1361,12 +1350,10 @@ func TestRle16RandomAndNot016(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
 func TestRle16RandomInplaceAndNot017(t *testing.T) {
-
 	t.Run("runContainer16 `iandNot` operation against other container types should correctly do the inplace-and-not operation", func(t *testing.T) {
 		seed := int64(42)
 		rand.Seed(seed)
@@ -1395,8 +1382,8 @@ func TestRle16RandomInplaceAndNot017(t *testing.T) {
 					mb[r1] = true
 				}
 
-				//showArray16(a, "a")
-				//showArray16(b, "b")
+				// showArray16(a, "a")
+				// showArray16(b, "b")
 
 				// hash version of and-not
 				hashi := make(map[int]bool)
@@ -1447,12 +1434,10 @@ func TestRle16RandomInplaceAndNot017(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
 func TestRle16InversionOfIntervals018(t *testing.T) {
-
 	t.Run("runContainer `invert` operation should do a NOT on the set of intervals, in-place", func(t *testing.T) {
 		seed := int64(42)
 		rand.Seed(seed)
@@ -1470,7 +1455,7 @@ func TestRle16InversionOfIntervals018(t *testing.T) {
 				a := []uint16{}
 
 				// hashNotA will be NOT ma
-				//for i := 0; i < n; i++ {
+				// for i := 0; i < n; i++ {
 				for i := 0; i < MaxUint16+1; i++ {
 					hashNotA[i] = true
 				}
@@ -1504,12 +1489,10 @@ func TestRle16InversionOfIntervals018(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
 func TestRle16SubtractionOfIntervals019(t *testing.T) {
-
 	t.Run("runContainer `subtract` operation removes an interval in-place", func(t *testing.T) {
 		// basics
 
@@ -1639,7 +1622,6 @@ func TestRle16Rank020(t *testing.T) {
 }
 
 func TestRle16NotAlsoKnownAsFlipRange021(t *testing.T) {
-
 	t.Run("runContainer `Not` operation should flip the bits of a range on the new returned container", func(t *testing.T) {
 		seed := int64(42)
 		rand.Seed(seed)
@@ -1866,7 +1848,6 @@ func TestRleIntersects023(t *testing.T) {
 		for i := range trials {
 			tester(trials[i])
 		}
-
 	})
 }
 
@@ -1950,7 +1931,7 @@ func TestRle16RandomFillLeastSignificant16bits029(t *testing.T) {
 					ma[r0] = true
 				}
 
-				//showArray16(a, "a")
+				// showArray16(a, "a")
 
 				// RunContainer
 				rc := newRunContainer16FromVals(false, a...)
@@ -2013,7 +1994,7 @@ func TestRle16RandomGetShortIterator030(t *testing.T) {
 					ma[r0] = true
 				}
 
-				//showArray16(a, "a")
+				// showArray16(a, "a")
 
 				// RunContainer
 				rc := newRunContainer16FromVals(false, a...)
@@ -2074,7 +2055,7 @@ func TestRle16RandomIaddRangeIremoveRange031(t *testing.T) {
 					ma[r0] = true
 				}
 
-				//showArray16(a, "a")
+				// showArray16(a, "a")
 
 				// RunContainer
 				rc := newRunContainer16FromVals(false, a...)
@@ -2198,14 +2179,14 @@ type twofer struct {
 
 func TestAllContainerMethodsAllContainerTypesWithData067(t *testing.T) {
 	t.Run("each of the container methods that takes two containers should handle all 3x3==9 possible ways of being called -- and return results that agree with each other", func(t *testing.T) {
-
 		seed := int64(42)
 		rand.Seed(seed)
 
 		srang := newInterval16Range(MaxUint16-100, MaxUint16)
 		trials := []trial{
 			{n: 100, percentFill: .7, ntrial: 1, numRandomOpsPass: 100},
-			{n: 100, percentFill: .7, ntrial: 1, numRandomOpsPass: 100, srang: &srang}}
+			{n: 100, percentFill: .7, ntrial: 1, numRandomOpsPass: 100, srang: &srang},
+		}
 
 		tester := func(tr trial) {
 			for j := 0; j < tr.ntrial; j++ {
@@ -2333,6 +2314,62 @@ func TestRuntimeIteratorAdvance(t *testing.T) {
 	testContainerIteratorAdvance(t, newRunContainer16())
 }
 
+func TestIntervalOverlaps(t *testing.T) {
+	a := newInterval16Range(0, 9)
+	b := newInterval16Range(0, 9)
+
+	assert.Error(t, intervalOverlaps(a, b))
+
+	a = newInterval16Range(0, 9)
+	b = newInterval16Range(10, 20)
+
+	assert.NoError(t, intervalOverlaps(a, b))
+
+	a = newInterval16Range(0, 12)
+	b = newInterval16Range(10, 20)
+
+	assert.Error(t, intervalOverlaps(a, b))
+}
+
+func TestIntervalValidation(t *testing.T) {
+	// TODO table driven
+	rc := &runContainer16{}
+	assert.Error(t, rc.validate())
+
+	a := newInterval16Range(0, 9)
+	b := newInterval16Range(0, 9)
+	rc = &runContainer16{}
+	rc.iv = append(rc.iv, a)
+	rc.iv = append(rc.iv, b)
+	assert.Error(t, rc.validate(), "expected range overlap")
+
+	a = newInterval16Range(0, 9)
+	b = newInterval16Range(10, 20)
+
+	rc = &runContainer16{}
+	rc.iv = append(rc.iv, a)
+	rc.iv = append(rc.iv, b)
+	assert.NoError(t, rc.validate(), "expected valid")
+
+	a = newInterval16Range(0, 12)
+	b = newInterval16Range(10, 20)
+
+	rc = &runContainer16{}
+	rc.iv = append(rc.iv, a)
+	rc.iv = append(rc.iv, b)
+	assert.Error(t, rc.validate(), "expected range overlap")
+
+	c := newInterval16Range(100, 150)
+	d := newInterval16Range(1000, 10000)
+
+	rc = &runContainer16{}
+	rc.iv = append(rc.iv, a)
+	rc.iv = append(rc.iv, b)
+	rc.iv = append(rc.iv, c)
+	rc.iv = append(rc.iv, d)
+	assert.Error(t, rc.validate())
+}
+
 // go test -bench BenchmarkShortIteratorAdvance -run -
 func BenchmarkShortIteratorAdvanceRuntime(b *testing.B) {
 	benchmarkContainerIteratorAdvance(b, newRunContainer16())
@@ -2346,7 +2383,6 @@ func BenchmarkShortIteratorNextRuntime(b *testing.B) {
 // generate random contents, then return that same
 // logical content in three different container types
 func getRandomSameThreeContainers(tr trial) (*arrayContainer, *runContainer16, *bitmapContainer) {
-
 	ma := make(map[int]bool)
 
 	n := tr.n

From 32a987679146413afdbb2268249a81d7676df9da Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Wed, 1 May 2024 17:07:06 -0400
Subject: [PATCH 02/11] Fixes checks + Add Err*

---
 arraycontainer_test.go  |  2 +-
 bitmapcontainer.go      |  8 +++---
 bitmapcontainer_test.go | 18 +++++++++++++
 roaring_test.go         | 20 ++++++++++++++-
 roaringarray.go         | 32 ++++++++++++++++++++---
 runcontainer.go         | 36 +++++++++++++++++++-------
 runcontainer_test.go    | 56 +++++++++++++++++++++++++++++++++++------
 7 files changed, 147 insertions(+), 25 deletions(-)

diff --git a/arraycontainer_test.go b/arraycontainer_test.go
index bc79c6fd..09541157 100644
--- a/arraycontainer_test.go
+++ b/arraycontainer_test.go
@@ -472,7 +472,7 @@ func TestArrayContainerValidation(t *testing.T) {
 		array.iadd(uint16(i))
 	}
 	err = array.validate()
-	assert.NoError(t, err)
+	assert.Error(t, err)
 }
 
 // go test -bench BenchmarkShortIteratorAdvance -run -
diff --git a/bitmapcontainer.go b/bitmapcontainer.go
index 2c7197d4..1903e785 100644
--- a/bitmapcontainer.go
+++ b/bitmapcontainer.go
@@ -1,7 +1,6 @@
 package roaring
 
 import (
-	"errors"
 	"fmt"
 	"unsafe"
 )
@@ -1235,8 +1234,11 @@ func (bc *bitmapContainer) addOffset(x uint16) (container, container) {
 
 // validate checks that the container size is non-negative
 func (bc *bitmapContainer) validate() error {
-	if bc.getCardinality() < 0 {
-		return errors.New("negative size")
+	if bc.cardinality < arrayDefaultMaxSize {
+		return fmt.Errorf("bitmap container size was less than: %d", arrayDefaultMaxSize)
+	}
+	if bc.cardinality > maxCapacity {
+		return fmt.Errorf("bitmap container size was greater than: %d", maxCapacity)
 	}
 
 	return nil
diff --git a/bitmapcontainer_test.go b/bitmapcontainer_test.go
index af985d1e..c6121108 100644
--- a/bitmapcontainer_test.go
+++ b/bitmapcontainer_test.go
@@ -328,3 +328,21 @@ func TestBitmapContainerIAndNot(t *testing.T) {
 	require.ElementsMatch(t, []uint16{12279, 12282, 12285}, bc.(*arrayContainer).content)
 	require.Equal(t, 3, bc.getCardinality())
 }
+
+func TestBitMapContainerValidate(t *testing.T) {
+	bc := newBitmapContainer()
+
+	for i := 0; i < arrayDefaultMaxSize-1; i++ {
+		bc.iadd(uint16(i * 3))
+	}
+	// bitmap containers should have size arrayDefaultMaxSize or larger
+	assert.Error(t, bc.validate())
+	bc.iadd(math.MaxUint16)
+
+	assert.NoError(t, bc.validate())
+
+	// Break the max cardinality invariant
+	bc.cardinality = maxCapacity + 1
+
+	assert.Error(t, bc.validate())
+}
diff --git a/roaring_test.go b/roaring_test.go
index d1071606..7967ca91 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -2636,7 +2636,25 @@ func TestFromBitSet(t *testing.T) {
 	})
 }
 
-func TestValidation(t *testing.T) {
+func TestRoaringArrayValidation(t *testing.T) {
+	a := newRoaringArray()
+
+	assert.ErrorIs(t, a.validate(), ErrEmptyKeys)
+
+	a.keys = append(a.keys, uint16(3), uint16(1))
+	assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
+	a.clear()
+
+	// build up cardinality coherent arrays
+	a.keys = append(a.keys, uint16(1), uint16(3), uint16(10))
+	assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
+	a.containers = append(a.containers, &runContainer16{}, &runContainer16{}, &runContainer16{})
+	assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
+	a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true)
+	assert.Errorf(t, a.validate(), "zero intervals")
+}
+
+func TestBitMapValidation(t *testing.T) {
 	bm := NewBitmap()
 	bm.AddRange(306, 406)
 	bm.AddRange(0, 100)
diff --git a/roaringarray.go b/roaringarray.go
index 8e8bf5db..2bd793fd 100644
--- a/roaringarray.go
+++ b/roaringarray.go
@@ -85,6 +85,12 @@ const (
 	run32Contype
 )
 
+var (
+	ErrEmptyKeys             = errors.New("keys were empty")
+	ErrKeySortOrder          = errors.New("keys were out of order")
+	ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
+)
+
 // careful: range is [firstOfRange,lastOfRange]
 func rangeOfOnes(start, last int) container {
 	if start > MaxUint16 {
@@ -752,19 +758,39 @@ func (ra *roaringArray) setNeedsCopyOnWrite(i int) {
 	ra.needCopyOnWrite[i] = true
 }
 
+func (ra *roaringArray) checkKeysSorted() bool {
+	if len(ra.keys) == 0 || len(ra.keys) == 1 {
+		return true
+	}
+	previous := ra.keys[0]
+	for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
+		next := ra.keys[nextIdx]
+		if previous >= next {
+			return false
+		}
+		previous = next
+
+	}
+	return true
+}
+
 // validate checks the referential integrity
 // ensures len(keys) == len(containers), recurses and checks each container type
 func (ra *roaringArray) validate() error {
 	if len(ra.keys) == 0 {
-		return errors.New("keys were empty")
+		return ErrEmptyKeys
+	}
+
+	if !ra.checkKeysSorted() {
+		return ErrKeySortOrder
 	}
 
 	if len(ra.keys) != len(ra.containers) {
-		return errors.New("keys and containers length did not match")
+		return ErrCardinalityConstraint
 	}
 
 	if len(ra.keys) != len(ra.needCopyOnWrite) {
-		return errors.New("keys and copy-on-write length did not match")
+		return ErrCardinalityConstraint
 	}
 
 	// TODO: We could parallelize this, not sure if that's warranted
diff --git a/runcontainer.go b/runcontainer.go
index 1328bff9..1a8d61b5 100644
--- a/runcontainer.go
+++ b/runcontainer.go
@@ -60,6 +60,13 @@ type interval16 struct {
 	length uint16 // length minus 1
 }
 
+var (
+	ErrRunIntervalsEmpty  = errors.New("run contained no interval")
+	ErrRunIntervalLength  = errors.New("interval had zero length")
+	ErrRunIntervalEqual   = errors.New("intervals were equal")
+	ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous")
+)
+
 func newInterval16Range(start, last uint16) interval16 {
 	if last < start {
 		panic(fmt.Sprintf("last (%d) cannot be smaller than start (%d)", last, start))
@@ -1490,7 +1497,18 @@ func (iv interval16) isSuperSetOf(b interval16) bool {
 	return iv.start <= b.start && b.last() <= iv.last()
 }
 
-func (iv interval16) hasEmptyIntersection(b interval16) bool {
+func (iv interval16) isNonContiguousDisjoint(b interval16) bool {
+	// cover the zero start case
+	if iv.start == b.start {
+		return false
+	}
+
+	nonContiguous1 := iv.start == b.last()+1 || iv.last() == b.start+1
+	nonContiguous2 := b.start == iv.last()+1 || b.last() == iv.start+1
+	if nonContiguous1 || nonContiguous2 {
+		return false
+	}
+
 	c1 := iv.start <= b.start && b.start <= iv.last()
 	c2 := b.start <= iv.start && iv.start <= b.last()
 
@@ -2612,10 +2630,10 @@ func (rc *runContainer16) addOffset(x uint16) (container, container) {
 	return low, high
 }
 
-// intervalOverlaps returns an error if the intervals overlap e.g have non-empty intersection
-func intervalOverlaps(outer interval16, inner interval16) error {
-	if !outer.hasEmptyIntersection(inner) {
-		return errors.New("intervals overlap")
+// isNonContiguousDisjoint returns an error if the intervals overlap e.g have non-empty intersection
+func isNonContiguousDisjoint(outer interval16, inner interval16) error {
+	if !outer.isNonContiguousDisjoint(inner) {
+		return ErrRunIntervalOverlap
 	}
 
 	return nil
@@ -2625,13 +2643,13 @@ func intervalOverlaps(outer interval16, inner interval16) error {
 // Ensures runs are not degenerate, non-contiguous and non-overlapping
 func (rc *runContainer16) validate() error {
 	if rc.getCardinality() == 0 {
-		return errors.New("zero intervals")
+		return ErrRunIntervalsEmpty
 	}
 
 	for outeridx := range rc.iv {
 
 		if rc.iv[outeridx].length == 0 {
-			return errors.New("zero run length")
+			return ErrRunIntervalLength
 		}
 
 		for inneridx := outeridx + 1; inneridx < len(rc.iv); inneridx++ {
@@ -2639,10 +2657,10 @@ func (rc *runContainer16) validate() error {
 			inner := rc.iv[inneridx]
 
 			if outer.equal(inner) {
-				return errors.New("intervals were equal")
+				return ErrRunIntervalEqual
 			}
 
-			err := intervalOverlaps(outer, inner)
+			err := isNonContiguousDisjoint(outer, inner)
 			if err != nil {
 				return err
 			}
diff --git a/runcontainer_test.go b/runcontainer_test.go
index 8f3835c4..d8e9fd97 100644
--- a/runcontainer_test.go
+++ b/runcontainer_test.go
@@ -2315,20 +2315,60 @@ func TestRuntimeIteratorAdvance(t *testing.T) {
 }
 
 func TestIntervalOverlaps(t *testing.T) {
+	// contiguous runs
 	a := newInterval16Range(0, 9)
-	b := newInterval16Range(0, 9)
+	b := newInterval16Range(10, 20)
 
-	assert.Error(t, intervalOverlaps(a, b))
+	// Ensure the function is reflexive
+	assert.False(t, a.isNonContiguousDisjoint(a))
+	assert.False(t, a.isNonContiguousDisjoint(b))
+	// Ensure the function is symmetric
+	assert.False(t, b.isNonContiguousDisjoint(a))
+	assert.Error(t, isNonContiguousDisjoint(a, b))
 
+	// identical runs
 	a = newInterval16Range(0, 9)
-	b = newInterval16Range(10, 20)
+	b = newInterval16Range(0, 9)
+
+	assert.False(t, a.isNonContiguousDisjoint(b))
+	assert.False(t, b.isNonContiguousDisjoint(a))
+	assert.Error(t, isNonContiguousDisjoint(a, b))
 
-	assert.NoError(t, intervalOverlaps(a, b))
+	// identical start runs
+	a = newInterval16Range(0, 9)
+	b = newInterval16Range(0, 20)
+
+	assert.False(t, a.isNonContiguousDisjoint(b))
+	assert.False(t, b.isNonContiguousDisjoint(a))
+	assert.Error(t, isNonContiguousDisjoint(a, b))
 
+	// overlapping runs
 	a = newInterval16Range(0, 12)
 	b = newInterval16Range(10, 20)
 
-	assert.Error(t, intervalOverlaps(a, b))
+	assert.False(t, a.isNonContiguousDisjoint(b))
+	assert.Error(t, isNonContiguousDisjoint(a, b))
+
+	// subset runs
+	a = newInterval16Range(0, 12)
+	b = newInterval16Range(5, 9)
+
+	assert.False(t, a.isNonContiguousDisjoint(b))
+	assert.Error(t, isNonContiguousDisjoint(a, b))
+
+	// degenerate runs
+	a = newInterval16Range(0, 0)
+	b = newInterval16Range(5, 5)
+
+	assert.True(t, a.isNonContiguousDisjoint(b))
+	assert.NoError(t, isNonContiguousDisjoint(a, b))
+
+	// disjoint non-contiguous runs
+	a = newInterval16Range(0, 100)
+	b = newInterval16Range(1000, 2000)
+
+	assert.True(t, a.isNonContiguousDisjoint(b))
+	assert.NoError(t, isNonContiguousDisjoint(a, b))
 }
 
 func TestIntervalValidation(t *testing.T) {
@@ -2341,7 +2381,7 @@ func TestIntervalValidation(t *testing.T) {
 	rc = &runContainer16{}
 	rc.iv = append(rc.iv, a)
 	rc.iv = append(rc.iv, b)
-	assert.Error(t, rc.validate(), "expected range overlap")
+	assert.ErrorIs(t, rc.validate(), ErrRunIntervalEqual)
 
 	a = newInterval16Range(0, 9)
 	b = newInterval16Range(10, 20)
@@ -2349,7 +2389,7 @@ func TestIntervalValidation(t *testing.T) {
 	rc = &runContainer16{}
 	rc.iv = append(rc.iv, a)
 	rc.iv = append(rc.iv, b)
-	assert.NoError(t, rc.validate(), "expected valid")
+	assert.ErrorIs(t, rc.validate(), ErrRunIntervalOverlap)
 
 	a = newInterval16Range(0, 12)
 	b = newInterval16Range(10, 20)
@@ -2357,7 +2397,7 @@ func TestIntervalValidation(t *testing.T) {
 	rc = &runContainer16{}
 	rc.iv = append(rc.iv, a)
 	rc.iv = append(rc.iv, b)
-	assert.Error(t, rc.validate(), "expected range overlap")
+	assert.Error(t, rc.validate(), ErrRunIntervalOverlap)
 
 	c := newInterval16Range(100, 150)
 	d := newInterval16Range(1000, 10000)

From 7ba3ee09d1ca2c98f1422f696d92e697487f4c7b Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Wed, 1 May 2024 17:16:08 -0400
Subject: [PATCH 03/11] Add sorted check on run container

---
 runcontainer.go      |  8 +++++++-
 runcontainer_test.go | 16 +++++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/runcontainer.go b/runcontainer.go
index 1a8d61b5..bdd82e83 100644
--- a/runcontainer.go
+++ b/runcontainer.go
@@ -62,6 +62,7 @@ type interval16 struct {
 
 var (
 	ErrRunIntervalsEmpty  = errors.New("run contained no interval")
+	ErrRunNonSorted       = errors.New("runs were not sorted")
 	ErrRunIntervalLength  = errors.New("interval had zero length")
 	ErrRunIntervalEqual   = errors.New("intervals were equal")
 	ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous")
@@ -2652,14 +2653,19 @@ func (rc *runContainer16) validate() error {
 			return ErrRunIntervalLength
 		}
 
+		outer := rc.iv[outeridx]
 		for inneridx := outeridx + 1; inneridx < len(rc.iv); inneridx++ {
-			outer := rc.iv[outeridx]
+
 			inner := rc.iv[inneridx]
 
 			if outer.equal(inner) {
 				return ErrRunIntervalEqual
 			}
 
+			if outer.start >= inner.start {
+				return ErrRunNonSorted
+			}
+
 			err := isNonContiguousDisjoint(outer, inner)
 			if err != nil {
 				return err
diff --git a/runcontainer_test.go b/runcontainer_test.go
index d8e9fd97..7502f7d8 100644
--- a/runcontainer_test.go
+++ b/runcontainer_test.go
@@ -2407,7 +2407,21 @@ func TestIntervalValidation(t *testing.T) {
 	rc.iv = append(rc.iv, b)
 	rc.iv = append(rc.iv, c)
 	rc.iv = append(rc.iv, d)
-	assert.Error(t, rc.validate())
+	assert.ErrorIs(t, rc.validate(), ErrRunIntervalOverlap)
+
+	a = newInterval16Range(0, 10)
+	b = newInterval16Range(100, 200)
+
+	// missort
+	rc = &runContainer16{}
+	rc.iv = append(rc.iv, b)
+	rc.iv = append(rc.iv, a)
+	assert.ErrorIs(t, rc.validate(), ErrRunNonSorted)
+
+	rc = &runContainer16{}
+	rc.iv = append(rc.iv, a)
+	rc.iv = append(rc.iv, b)
+	assert.NoError(t, rc.validate())
 }
 
 // go test -bench BenchmarkShortIteratorAdvance -run -

From 261b8605ff18b0b3450fc8c896f1fc4cf7401d8c Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Wed, 1 May 2024 20:00:50 -0400
Subject: [PATCH 04/11] Add MustReadFrom with panic

---
 roaring.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/roaring.go b/roaring.go
index 19efbca0..3d83726f 100644
--- a/roaring.go
+++ b/roaring.go
@@ -314,6 +314,14 @@ func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err
 	return
 }
 
+func (rb *Bitmap) MustReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
+	rb.ReadFrom(reader, cookieHeader...)
+	if err := rb.Validate(); err != nil {
+		panic(err)
+	}
+	return
+}
+
 // FromBuffer creates a bitmap from its serialized version stored in buffer
 //
 // The format specification is available here:

From c2d3e294c11bacf1d52e92ba3a9ba45963e529d5 Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Wed, 1 May 2024 20:02:23 -0400
Subject: [PATCH 05/11] Add docstring to MustReadFrom

---
 roaring.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/roaring.go b/roaring.go
index 3d83726f..9508711a 100644
--- a/roaring.go
+++ b/roaring.go
@@ -314,6 +314,9 @@ func (rb *Bitmap) ReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err
 	return
 }
 
+// MustReadFrom calls ReadFrom internally.
+// After deserialization Validate will be called.
+// If the Bitmap fails to validate, a panic with the validation error will be thrown
 func (rb *Bitmap) MustReadFrom(reader io.Reader, cookieHeader ...byte) (p int64, err error) {
 	rb.ReadFrom(reader, cookieHeader...)
 	if err := rb.Validate(); err != nil {

From 331d5c0a1e160f5e1c4cfefdb0d45c96aa3fd8b2 Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Thu, 2 May 2024 17:43:37 -0400
Subject: [PATCH 06/11] Add serialization corruption

---
 arraycontainer.go |  5 +++-
 roaring_test.go   | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 runcontainer.go   |  8 ++++--
 3 files changed, 80 insertions(+), 3 deletions(-)

diff --git a/arraycontainer.go b/arraycontainer.go
index 9a0d4a01..5eecd6dd 100644
--- a/arraycontainer.go
+++ b/arraycontainer.go
@@ -9,6 +9,8 @@ type arrayContainer struct {
 	content []uint16
 }
 
+var ErrArrayIncorrectSort = errors.New("incorrectly sorted array")
+
 func (ac *arrayContainer) String() string {
 	s := "{"
 	for it := ac.getShortIterator(); it.hasNext(); {
@@ -1097,6 +1099,7 @@ func (ac *arrayContainer) addOffset(x uint16) (container, container) {
 func (ac *arrayContainer) validate() error {
 	cardinality := ac.getCardinality()
 
+	// TODO use ERR consts
 	if cardinality <= 0 {
 		return errors.New("zero or negative size")
 	}
@@ -1109,7 +1112,7 @@ func (ac *arrayContainer) validate() error {
 	for i := 1; i < len(ac.content); i++ {
 		next := ac.content[i]
 		if previous > next {
-			return errors.New("incorrectly sorted array")
+			return ErrArrayIncorrectSort
 		}
 		previous = next
 
diff --git a/roaring_test.go b/roaring_test.go
index 7967ca91..db64fafb 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -2678,6 +2678,76 @@ func TestBitMapValidation(t *testing.T) {
 	assert.NoError(t, bm.Validate())
 }
 
+func TestBitMapValidationFromDeserialization(t *testing.T) {
+	// To understand what is going on here, read https://github.com/RoaringBitmap/RoaringFormatSpec
+
+	defer func() {
+		if err := recover(); err != nil {
+			// TODO assert on the error type.
+			fmt.Println("panicked")
+		}
+	}()
+
+	bm := NewBitmap()
+
+	// Maintainers: If you change this construction you must change the statements below.
+	// The tests expect a certain size, with values at certain location.
+	// TODO: A good extension would be to inspect the map and dynamically figure out what you can corrupt
+	randomEntries := make([]uint32, 0, 10)
+	for i := 0; i < 10; i++ {
+		randomEntries = append(randomEntries, uint32(i))
+	}
+	bm.AddMany(randomEntries)
+	assert.NoError(t, bm.Validate())
+	serialized, err := bm.ToBytes()
+	assert.NoError(t, err)
+
+	deserializedBitMap := NewBitmap()
+	deserializedBitMap.MustReadFrom(bytes.NewReader(serialized))
+	deserializedBitMap.Validate()
+
+	// corrupt the byte stream
+	// break sort order, serialized[34] equals 9 in a correct sort
+	serialized[34] = 0
+	corruptedDeserializedBitMap := NewBitmap()
+	corruptedDeserializedBitMap.MustReadFrom(bytes.NewReader(serialized))
+	// We will never hit this because of the recover
+	t.Errorf("did not panic")
+}
+
+func TestBitMapValidationFromDeserializationRuns(t *testing.T) {
+	// See above tests for more information
+
+	bm := NewBitmap()
+	bm.AddRange(100, 110)
+	assert.NoError(t, bm.Validate())
+	serialized, err := bm.ToBytes()
+	serialized[13] = 0
+	assert.NoError(t, err)
+	corruptedDeserializedBitMap := NewBitmap()
+	corruptedDeserializedBitMap.ReadFrom(bytes.NewReader(serialized))
+	assert.ErrorIs(t, corruptedDeserializedBitMap.Validate(), ErrRunIntervalLength)
+}
+
+func TestBitMapValidationFromDeserializationNumRuns(t *testing.T) {
+	// See above tests for more information
+
+	bm := NewBitmap()
+	bm.AddRange(100, 110)
+	bm.AddRange(115, 125)
+	// assert.NoError(t, bm.Validate())
+	serialized, err := bm.ToBytes()
+	assert.NoError(t, err)
+	// Force run overlap
+	serialized[15] = 108
+
+	corruptedDeserializedBitMap := NewBitmap()
+	corruptedDeserializedBitMap.ReadFrom(bytes.NewReader(serialized))
+	v := corruptedDeserializedBitMap.Validate()
+	fmt.Println(v)
+	assert.ErrorIs(t, corruptedDeserializedBitMap.Validate(), ErrRunIntervalOverlap)
+}
+
 func BenchmarkFromDense(b *testing.B) {
 	testDense(func(name string, rb *Bitmap) {
 		dense := make([]uint64, rb.DenseSize())
diff --git a/runcontainer.go b/runcontainer.go
index bdd82e83..53d52fe0 100644
--- a/runcontainer.go
+++ b/runcontainer.go
@@ -1509,9 +1509,11 @@ func (iv interval16) isNonContiguousDisjoint(b interval16) bool {
 	if nonContiguous1 || nonContiguous2 {
 		return false
 	}
+	ivl := iv.last()
+	bl := b.last()
 
-	c1 := iv.start <= b.start && b.start <= iv.last()
-	c2 := b.start <= iv.start && iv.start <= b.last()
+	c1 := iv.start <= b.start && b.start <= ivl
+	c2 := b.start <= iv.start && iv.start <= bl
 
 	return !c1 && !c2
 }
@@ -2662,6 +2664,8 @@ func (rc *runContainer16) validate() error {
 				return ErrRunIntervalEqual
 			}
 
+			// only check the start of runs
+			// if the run length overlap the next check will catch that.
 			if outer.start >= inner.start {
 				return ErrRunNonSorted
 			}

From c99a7cc6cc95c15df7bf6bdbc49f5c149298f86b Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Thu, 2 May 2024 20:59:13 -0400
Subject: [PATCH 07/11] Convert to table-driven tests

---
 roaring_test.go | 133 ++++++++++++++++++++++++------------------------
 1 file changed, 67 insertions(+), 66 deletions(-)

diff --git a/roaring_test.go b/roaring_test.go
index db64fafb..f7e92e84 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -2678,74 +2678,75 @@ func TestBitMapValidation(t *testing.T) {
 	assert.NoError(t, bm.Validate())
 }
 
-func TestBitMapValidationFromDeserialization(t *testing.T) {
-	// To understand what is going on here, read https://github.com/RoaringBitmap/RoaringFormatSpec
-
-	defer func() {
-		if err := recover(); err != nil {
-			// TODO assert on the error type.
-			fmt.Println("panicked")
-		}
-	}()
-
-	bm := NewBitmap()
-
-	// Maintainers: If you change this construction you must change the statements below.
-	// The tests expect a certain size, with values at certain location.
-	// TODO: A good extension would be to inspect the map and dynamically figure out what you can corrupt
-	randomEntries := make([]uint32, 0, 10)
-	for i := 0; i < 10; i++ {
-		randomEntries = append(randomEntries, uint32(i))
+func TestBitMapValidationFromDeserializationC(t *testing.T) {
+	deserializationTests := []struct {
+		name      string
+		loader    func(bm *Bitmap)
+		corruptor func(s []byte)
+		err       error
+	}{
+		{
+			name: "Corrupts Run Length",
+			loader: func(bm *Bitmap) {
+				bm.AddRange(100, 110)
+			},
+			corruptor: func(s []byte) {
+				// 13 is the length of the run
+				s[13] = 0
+			},
+			err: ErrRunIntervalLength,
+		},
+		{
+			name: "Creates Interval Overlap",
+			loader: func(bm *Bitmap) {
+				bm.AddRange(100, 110)
+				bm.AddRange(115, 125)
+			},
+			corruptor: func(s []byte) {
+				// sets the start of the second run
+				s[15] = 108
+			},
+			err: ErrRunIntervalOverlap,
+		},
+		{
+			name: "Break Array Sort Order",
+			loader: func(bm *Bitmap) {
+				randomEntries := make([]uint32, 0, 10)
+				for i := 0; i < 10; i++ {
+					randomEntries = append(randomEntries, uint32(i))
+				}
+				bm.AddMany(randomEntries)
+			},
+			corruptor: func(s []byte) {
+				s[34] = 0
+			},
+			err: ErrArrayIncorrectSort,
+		},
 	}
-	bm.AddMany(randomEntries)
-	assert.NoError(t, bm.Validate())
-	serialized, err := bm.ToBytes()
-	assert.NoError(t, err)
-
-	deserializedBitMap := NewBitmap()
-	deserializedBitMap.MustReadFrom(bytes.NewReader(serialized))
-	deserializedBitMap.Validate()
-
-	// corrupt the byte stream
-	// break sort order, serialized[34] equals 9 in a correct sort
-	serialized[34] = 0
-	corruptedDeserializedBitMap := NewBitmap()
-	corruptedDeserializedBitMap.MustReadFrom(bytes.NewReader(serialized))
-	// We will never hit this because of the recover
-	t.Errorf("did not panic")
-}
-
-func TestBitMapValidationFromDeserializationRuns(t *testing.T) {
-	// See above tests for more information
 
-	bm := NewBitmap()
-	bm.AddRange(100, 110)
-	assert.NoError(t, bm.Validate())
-	serialized, err := bm.ToBytes()
-	serialized[13] = 0
-	assert.NoError(t, err)
-	corruptedDeserializedBitMap := NewBitmap()
-	corruptedDeserializedBitMap.ReadFrom(bytes.NewReader(serialized))
-	assert.ErrorIs(t, corruptedDeserializedBitMap.Validate(), ErrRunIntervalLength)
-}
-
-func TestBitMapValidationFromDeserializationNumRuns(t *testing.T) {
-	// See above tests for more information
-
-	bm := NewBitmap()
-	bm.AddRange(100, 110)
-	bm.AddRange(115, 125)
-	// assert.NoError(t, bm.Validate())
-	serialized, err := bm.ToBytes()
-	assert.NoError(t, err)
-	// Force run overlap
-	serialized[15] = 108
-
-	corruptedDeserializedBitMap := NewBitmap()
-	corruptedDeserializedBitMap.ReadFrom(bytes.NewReader(serialized))
-	v := corruptedDeserializedBitMap.Validate()
-	fmt.Println(v)
-	assert.ErrorIs(t, corruptedDeserializedBitMap.Validate(), ErrRunIntervalOverlap)
+	for _, tt := range deserializationTests {
+		t.Run(tt.name, func(t *testing.T) {
+			defer func() {
+				if err := recover(); err != nil {
+				}
+			}()
+
+			bm := NewBitmap()
+			tt.loader(bm)
+			assert.NoError(t, bm.Validate())
+			serialized, err := bm.ToBytes()
+			assert.NoError(t, err)
+			tt.corruptor(serialized)
+			corruptedDeserializedBitMap := NewBitmap()
+			corruptedDeserializedBitMap.ReadFrom(bytes.NewReader(serialized))
+			assert.ErrorIs(t, corruptedDeserializedBitMap.Validate(), tt.err)
+
+			corruptedDeserializedBitMap = NewBitmap()
+			corruptedDeserializedBitMap.MustReadFrom(bytes.NewReader(serialized))
+			// We will never hit this because of the recover
+			t.Errorf("did not panic")
+		})
+	}
 }
 
 func BenchmarkFromDense(b *testing.B) {

From 680eade25628261a480cad08f3175bebc6216049 Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Sun, 5 May 2024 19:52:14 -0400
Subject: [PATCH 08/11] Add comments to test

---
 roaring_test.go | 14 ++++++++++----
 roaringarray.go |  1 -
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/roaring_test.go b/roaring_test.go
index f7e92e84..79066fde 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -2656,8 +2656,8 @@ func TestRoaringArrayValidation(t *testing.T) {
 
 func TestBitMapValidation(t *testing.T) {
 	bm := NewBitmap()
-	bm.AddRange(306, 406)
 	bm.AddRange(0, 100)
+	bm.AddRange(306, 406)
 	bm.AddRange(102, 202)
 	bm.AddRange(204, 304)
 	assert.NoError(t, bm.Validate())
@@ -2679,6 +2679,12 @@ func TestBitMapValidation(t *testing.T) {
 }
 
 func TestBitMapValidationFromDeserializationC(t *testing.T) {
+	// To understand what is going on here, read https://github.com/RoaringBitmap/RoaringFormatSpec
+	// Maintainers: The loader and corruptor are dependent on one another
+	// The tests expect a certain size, with values at certain location.
+
+	// There is no way to test Bitmap container corruption once the bitmap is deserialzied
+
 	deserializationTests := []struct {
 		name      string
 		loader    func(bm *Bitmap)
@@ -2711,11 +2717,11 @@ func TestBitMapValidationFromDeserializationC(t *testing.T) {
 		{
 			name: "Break Array Sort Order",
 			loader: func(bm *Bitmap) {
-				randomEntries := make([]uint32, 0, 10)
+				arrayEntries := make([]uint32, 0, 10)
 				for i := 0; i < 10; i++ {
-					randomEntries = append(randomEntries, uint32(i))
+					arrayEntries = append(arrayEntries, uint32(i))
 				}
-				bm.AddMany(randomEntries)
+				bm.AddMany(arrayEntries)
 			},
 			corruptor: func(s []byte) {
 				s[34] = 0
diff --git a/roaringarray.go b/roaringarray.go
index 2bd793fd..e67f78d0 100644
--- a/roaringarray.go
+++ b/roaringarray.go
@@ -795,7 +795,6 @@ func (ra *roaringArray) validate() error {
 
 	// TODO: We could parallelize this, not sure if that's warranted
 	for _, container := range ra.containers {
-
 		err := container.validate()
 		if err != nil {
 			return err

From 8255e1da744ef8aeaac8c6958817bc477d17d6a5 Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Fri, 24 May 2024 20:18:09 -0400
Subject: [PATCH 09/11] Cleanup + Run size validation

---
 arraycontainer.go      | 10 ++++---
 arraycontainer_test.go |  2 --
 roaring_test.go        | 18 +++++++++++++
 roaringarray.go        |  1 -
 runcontainer.go        | 37 +++++++++++++++++++++-----
 runcontainer_test.go   | 60 +++++++++++++++++++++++++++++++-----------
 6 files changed, 99 insertions(+), 29 deletions(-)

diff --git a/arraycontainer.go b/arraycontainer.go
index 5eecd6dd..92036992 100644
--- a/arraycontainer.go
+++ b/arraycontainer.go
@@ -9,7 +9,10 @@ type arrayContainer struct {
 	content []uint16
 }
 
-var ErrArrayIncorrectSort = errors.New("incorrectly sorted array")
+var (
+	ErrArrayIncorrectSort = errors.New("incorrectly sorted array")
+	ErrArrayInvalidSize   = errors.New("invalid array size")
+)
 
 func (ac *arrayContainer) String() string {
 	s := "{"
@@ -1099,13 +1102,12 @@ func (ac *arrayContainer) addOffset(x uint16) (container, container) {
 func (ac *arrayContainer) validate() error {
 	cardinality := ac.getCardinality()
 
-	// TODO use ERR consts
 	if cardinality <= 0 {
-		return errors.New("zero or negative size")
+		return ErrArrayInvalidSize
 	}
 
 	if cardinality > arrayDefaultMaxSize {
-		return errors.New("exceeds default max size")
+		return ErrArrayInvalidSize
 	}
 
 	previous := ac.content[0]
diff --git a/arraycontainer_test.go b/arraycontainer_test.go
index 09541157..af9a8647 100644
--- a/arraycontainer_test.go
+++ b/arraycontainer_test.go
@@ -436,8 +436,6 @@ func TestArrayContainerResetTo(t *testing.T) {
 }
 
 func TestArrayContainerValidation(t *testing.T) {
-	// TODO Use table based testing
-
 	array := newArrayContainer()
 	upperBound := arrayDefaultMaxSize
 
diff --git a/roaring_test.go b/roaring_test.go
index 79066fde..2ebb9768 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -2682,6 +2682,7 @@ func TestBitMapValidationFromDeserializationC(t *testing.T) {
 	// To understand what is going on here, read https://github.com/RoaringBitmap/RoaringFormatSpec
 	// Maintainers: The loader and corruptor are dependent on one another
 	// The tests expect a certain size, with values at certain location.
+	// The tests are geared toward single byte corruption.
 
 	// There is no way to test Bitmap container corruption once the bitmap is deserialzied
 
@@ -2691,6 +2692,20 @@ func TestBitMapValidationFromDeserializationC(t *testing.T) {
 		corruptor func(s []byte)
 		err       error
 	}{
+		{
+			name: "Corrupts Run Length vs Num Runs",
+			loader: func(bm *Bitmap) {
+				bm.AddRange(0, 2)
+				bm.AddRange(4, 6)
+				bm.AddRange(8, 100)
+			},
+			corruptor: func(s []byte) {
+				// 21 is the length of the run of the last run/range
+				// Shortening causes interval sum to be to short
+				s[21] = 1
+			},
+			err: ErrRunIntervalSize,
+		},
 		{
 			name: "Corrupts Run Length",
 			loader: func(bm *Bitmap) {
@@ -2698,6 +2713,7 @@ func TestBitMapValidationFromDeserializationC(t *testing.T) {
 			},
 			corruptor: func(s []byte) {
 				// 13 is the length of the run
+				// Setting to zero causes an invalid run
 				s[13] = 0
 			},
 			err: ErrRunIntervalLength,
@@ -2710,6 +2726,7 @@ func TestBitMapValidationFromDeserializationC(t *testing.T) {
 			},
 			corruptor: func(s []byte) {
 				// sets the start of the second run
+				// Creates overlapping intervals
 				s[15] = 108
 			},
 			err: ErrRunIntervalOverlap,
@@ -2724,6 +2741,7 @@ func TestBitMapValidationFromDeserializationC(t *testing.T) {
 				bm.AddMany(arrayEntries)
 			},
 			corruptor: func(s []byte) {
+				// breaks the sort order
 				s[34] = 0
 			},
 			err: ErrArrayIncorrectSort,
diff --git a/roaringarray.go b/roaringarray.go
index e67f78d0..52ccf483 100644
--- a/roaringarray.go
+++ b/roaringarray.go
@@ -793,7 +793,6 @@ func (ra *roaringArray) validate() error {
 		return ErrCardinalityConstraint
 	}
 
-	// TODO: We could parallelize this, not sure if that's warranted
 	for _, container := range ra.containers {
 		err := container.validate()
 		if err != nil {
diff --git a/runcontainer.go b/runcontainer.go
index 53d52fe0..6d5aef1d 100644
--- a/runcontainer.go
+++ b/runcontainer.go
@@ -66,6 +66,9 @@ var (
 	ErrRunIntervalLength  = errors.New("interval had zero length")
 	ErrRunIntervalEqual   = errors.New("intervals were equal")
 	ErrRunIntervalOverlap = errors.New("intervals overlapped or were continguous")
+	ErrRunIntervalSize    = errors.New("too many intervals relative to data")
+	MaxNumIntervals       = 2048
+	MaxIntervalsSum       = 2048
 )
 
 func newInterval16Range(start, last uint16) interval16 {
@@ -2162,7 +2165,7 @@ func (rc *runContainer16) orArray(ac *arrayContainer) container {
 	}
 	intervals, cardMinusOne := runArrayUnionToRuns(rc, ac)
 	result := newRunContainer16TakeOwnership(intervals)
-	if len(intervals) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
+	if len(intervals) >= MaxNumIntervals && cardMinusOne >= arrayDefaultMaxSize {
 		return newBitmapContainerFromRun(result)
 	}
 	if len(intervals)*2 > 1+int(cardMinusOne) {
@@ -2221,7 +2224,7 @@ func (rc *runContainer16) iorArray(ac *arrayContainer) container {
 	// this can be done with methods like the in-place array container union
 	// but maybe lazily moving the remaining elements back.
 	rc.iv, cardMinusOne = runArrayUnionToRuns(rc, ac)
-	if len(rc.iv) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
+	if len(rc.iv) >= MaxNumIntervals && cardMinusOne >= arrayDefaultMaxSize {
 		return newBitmapContainerFromRun(rc)
 	}
 	if len(rc.iv)*2 > 1+int(cardMinusOne) {
@@ -2649,34 +2652,54 @@ func (rc *runContainer16) validate() error {
 		return ErrRunIntervalsEmpty
 	}
 
+	intervalsSum := 0
 	for outeridx := range rc.iv {
 
 		if rc.iv[outeridx].length == 0 {
 			return ErrRunIntervalLength
 		}
 
-		outer := rc.iv[outeridx]
+		outerInterval := rc.iv[outeridx]
+
+		intervalsSum += outerInterval.runlen()
 		for inneridx := outeridx + 1; inneridx < len(rc.iv); inneridx++ {
 
-			inner := rc.iv[inneridx]
+			innerInterval := rc.iv[inneridx]
 
-			if outer.equal(inner) {
+			if outerInterval.equal(innerInterval) {
 				return ErrRunIntervalEqual
 			}
 
 			// only check the start of runs
 			// if the run length overlap the next check will catch that.
-			if outer.start >= inner.start {
+			if outerInterval.start >= innerInterval.start {
 				return ErrRunNonSorted
 			}
 
-			err := isNonContiguousDisjoint(outer, inner)
+			err := isNonContiguousDisjoint(outerInterval, innerInterval)
 			if err != nil {
 				return err
 			}
 		}
 
 	}
+	/*
+			if number of distinct values in the container >= 2048 then
+		    check that the number of runs is no more than 2047
+		    (otherwise you could use a bitset container)
+			else
+		    check that the number of runs < (number of distinct values) / 2
+		    (otherwise you could use an array container)
+	*/
+	if MaxIntervalsSum <= intervalsSum {
+		if !(len(rc.iv) < MaxNumIntervals) {
+			return ErrRunIntervalSize
+		}
+	} else {
+		if !(len(rc.iv) < (intervalsSum / 2)) {
+			return ErrRunIntervalSize
+		}
+	}
 
 	return nil
 }
diff --git a/runcontainer_test.go b/runcontainer_test.go
index 7502f7d8..3e6ce3ce 100644
--- a/runcontainer_test.go
+++ b/runcontainer_test.go
@@ -2371,42 +2371,35 @@ func TestIntervalOverlaps(t *testing.T) {
 	assert.NoError(t, isNonContiguousDisjoint(a, b))
 }
 
-func TestIntervalValidation(t *testing.T) {
-	// TODO table driven
+func TestIntervalValidationFailing(t *testing.T) {
 	rc := &runContainer16{}
 	assert.Error(t, rc.validate())
 
 	a := newInterval16Range(0, 9)
 	b := newInterval16Range(0, 9)
 	rc = &runContainer16{}
-	rc.iv = append(rc.iv, a)
-	rc.iv = append(rc.iv, b)
+	rc.iv = append(rc.iv, a, b)
 	assert.ErrorIs(t, rc.validate(), ErrRunIntervalEqual)
 
 	a = newInterval16Range(0, 9)
 	b = newInterval16Range(10, 20)
 
 	rc = &runContainer16{}
-	rc.iv = append(rc.iv, a)
-	rc.iv = append(rc.iv, b)
+	rc.iv = append(rc.iv, a, b)
 	assert.ErrorIs(t, rc.validate(), ErrRunIntervalOverlap)
 
 	a = newInterval16Range(0, 12)
 	b = newInterval16Range(10, 20)
 
 	rc = &runContainer16{}
-	rc.iv = append(rc.iv, a)
-	rc.iv = append(rc.iv, b)
+	rc.iv = append(rc.iv, a, b)
 	assert.Error(t, rc.validate(), ErrRunIntervalOverlap)
 
 	c := newInterval16Range(100, 150)
 	d := newInterval16Range(1000, 10000)
 
 	rc = &runContainer16{}
-	rc.iv = append(rc.iv, a)
-	rc.iv = append(rc.iv, b)
-	rc.iv = append(rc.iv, c)
-	rc.iv = append(rc.iv, d)
+	rc.iv = append(rc.iv, a, b, c, d)
 	assert.ErrorIs(t, rc.validate(), ErrRunIntervalOverlap)
 
 	a = newInterval16Range(0, 10)
@@ -2414,13 +2407,50 @@ func TestIntervalValidation(t *testing.T) {
 
 	// missort
 	rc = &runContainer16{}
-	rc.iv = append(rc.iv, b)
-	rc.iv = append(rc.iv, a)
+	rc.iv = append(rc.iv, b, a)
 	assert.ErrorIs(t, rc.validate(), ErrRunNonSorted)
 
 	rc = &runContainer16{}
+	start := -4
+	for i := 0; i < MaxNumIntervals; i++ {
+		start += 4
+		end := start + 2
+		a := newInterval16Range(uint16(start), uint16(end))
+		rc.iv = append(rc.iv, a)
+
+	}
+	assert.ErrorIs(t, rc.validate(), ErrRunIntervalSize)
+
+	// too many small runs, use array
+	rc = &runContainer16{}
+	start = -3
+	for i := 0; i < 10; i++ {
+		start += 3
+		end := start + 1
+		a := newInterval16Range(uint16(start), uint16(end))
+		rc.iv = append(rc.iv, a)
+
+	}
+	assert.ErrorIs(t, rc.validate(), ErrRunIntervalSize)
+}
+
+func TestIntervalValidationsPassing(t *testing.T) {
+	rc := &runContainer16{}
+	a := newInterval16Range(0, 10)
+	b := newInterval16Range(100, 200)
+	rc.iv = append(rc.iv, a, b)
+	assert.NoError(t, rc.validate())
+
+	// Large total sum, but enough intervals
+	rc = &runContainer16{}
+	a = newInterval16Range(0, uint16(MaxIntervalsSum+1))
 	rc.iv = append(rc.iv, a)
-	rc.iv = append(rc.iv, b)
+	assert.NoError(t, rc.validate())
+
+	rc = &runContainer16{}
+	a = newInterval16Range(0, uint16(MaxIntervalsSum+1))
+	b = newInterval16Range(uint16(MaxIntervalsSum+3), uint16(MaxIntervalsSum*2))
+	rc.iv = append(rc.iv, a, b)
 	assert.NoError(t, rc.validate())
 }
 

From 3254f8bd7f3497de08c9a31a0fc8018124a52c70 Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Wed, 29 May 2024 14:18:58 -0400
Subject: [PATCH 10/11] Additonal bitmap invariants + frozen

---
 bitmapcontainer.go            |  9 ++++
 bitmapcontainer_test.go       | 25 +++++++++
 roaring_test.go               |  2 +-
 serialization_frozen_test.go  | 99 +++++++++++++++++++++++++++++++++++
 serialization_littleendian.go |  9 ++++
 5 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/bitmapcontainer.go b/bitmapcontainer.go
index 1903e785..d59578d6 100644
--- a/bitmapcontainer.go
+++ b/bitmapcontainer.go
@@ -1237,9 +1237,18 @@ func (bc *bitmapContainer) validate() error {
 	if bc.cardinality < arrayDefaultMaxSize {
 		return fmt.Errorf("bitmap container size was less than: %d", arrayDefaultMaxSize)
 	}
+
+	if maxCapacity < len(bc.bitmap)*64 {
+		return fmt.Errorf("bitmap slize size %d exceeded max capacity %d", maxCapacity, len(bc.bitmap)*64)
+	}
+
 	if bc.cardinality > maxCapacity {
 		return fmt.Errorf("bitmap container size was greater than: %d", maxCapacity)
 	}
 
+	if bc.cardinality != int(popcntSlice(bc.bitmap)) {
+		return fmt.Errorf("bitmap container size %d did not match underlying slice length: %d", bc.cardinality, int(popcntSlice(bc.bitmap)))
+	}
+
 	return nil
 }
diff --git a/bitmapcontainer_test.go b/bitmapcontainer_test.go
index c6121108..76269097 100644
--- a/bitmapcontainer_test.go
+++ b/bitmapcontainer_test.go
@@ -345,4 +345,29 @@ func TestBitMapContainerValidate(t *testing.T) {
 	bc.cardinality = maxCapacity + 1
 
 	assert.Error(t, bc.validate())
+
+	// violate cardinality underlying container size invariant
+	bc = newBitmapContainer()
+	for i := 0; i < arrayDefaultMaxSize+1; i++ {
+		bc.iadd(uint16(i * 3))
+	}
+	assert.NoError(t, bc.validate())
+	bc.cardinality += 1
+	assert.Error(t, bc.validate())
+
+	// check that underlying packed slice doesn't exceed maxCapacity
+	bc = newBitmapContainer()
+	orginalSize := (1 << 16) / 64
+	for i := 0; i < orginalSize; i++ {
+		bc.cardinality += 1
+		bc.bitmap[i] = uint64(1)
+	}
+
+	appendSize := ((1 << 16) - orginalSize) + 1
+	for i := 0; i < appendSize; i++ {
+		bc.cardinality += 1
+		bc.bitmap = append(bc.bitmap, uint64(1))
+	}
+
+	assert.Error(t, bc.validate())
 }
diff --git a/roaring_test.go b/roaring_test.go
index 2ebb9768..7124dfd6 100644
--- a/roaring_test.go
+++ b/roaring_test.go
@@ -2678,7 +2678,7 @@ func TestBitMapValidation(t *testing.T) {
 	assert.NoError(t, bm.Validate())
 }
 
-func TestBitMapValidationFromDeserializationC(t *testing.T) {
+func TestBitMapValidationFromDeserialization(t *testing.T) {
 	// To understand what is going on here, read https://github.com/RoaringBitmap/RoaringFormatSpec
 	// Maintainers: The loader and corruptor are dependent on one another
 	// The tests expect a certain size, with values at certain location.
diff --git a/serialization_frozen_test.go b/serialization_frozen_test.go
index 8ed7f3b2..52542077 100644
--- a/serialization_frozen_test.go
+++ b/serialization_frozen_test.go
@@ -10,6 +10,8 @@ import (
 	"os"
 	"reflect"
 	"testing"
+
+	"github.com/stretchr/testify/assert"
 )
 
 func TestFrozenFormat(t *testing.T) {
@@ -134,3 +136,100 @@ func TestFrozenFormat(t *testing.T) {
 		})
 	}
 }
+
+func TestFrozenFormatSimpleValidation(t *testing.T) {
+	bm := NewBitmap()
+	deserializedBitMap := NewBitmap()
+	bm.AddRange(0, 2)
+	bm.AddRange(4, 6)
+	bm.AddRange(8, 100)
+	serialized, err := bm.Freeze()
+	assert.NoError(t, err)
+	serialized[10] = 1
+	assert.Error(t, deserializedBitMap.MustFrozenView(serialized))
+}
+
+func TestBitMapValidationFromFrozen(t *testing.T) {
+	// To understand what is going on here, read https://github.com/RoaringBitmap/RoaringFormatSpec
+	// Maintainers: The loader and corruptor are dependent on one another
+	// The tests expect a certain size, with values at certain location.
+	// The tests are geared toward single byte corruption.
+
+	// There is no way to test Bitmap container corruption once the bitmap is deserialzied
+
+	deserializationTests := []struct {
+		name      string
+		loader    func(bm *Bitmap)
+		corruptor func(s []byte)
+		err       error
+	}{
+		{
+			name: "Corrupts Run Length vs Num Runs",
+			loader: func(bm *Bitmap) {
+				bm.AddRange(0, 2)
+				bm.AddRange(4, 6)
+				bm.AddRange(8, 100)
+			},
+			corruptor: func(s []byte) {
+				// 21 is the length of the run of the last run/range
+				// Shortening causes interval sum to be to short
+				s[10] = 1
+			},
+			err: ErrRunIntervalSize,
+		},
+		{
+			name: "Corrupts Run Length",
+			loader: func(bm *Bitmap) {
+				bm.AddRange(100, 110)
+			},
+			corruptor: func(s []byte) {
+				// 13 is the length of the run
+				// Setting to zero causes an invalid run
+				s[2] = 0
+			},
+			err: ErrRunIntervalLength,
+		},
+		{
+			name: "Creates Interval Overlap",
+			loader: func(bm *Bitmap) {
+				bm.AddRange(100, 110)
+				bm.AddRange(115, 125)
+			},
+			corruptor: func(s []byte) {
+				// sets the start of the second run
+				// Creates overlapping intervals
+				s[4] = 108
+			},
+			err: ErrRunIntervalOverlap,
+		},
+		{
+			name: "Break Array Sort Order",
+			loader: func(bm *Bitmap) {
+				arrayEntries := make([]uint32, 0, 10)
+				for i := 0; i < 10; i++ {
+					arrayEntries = append(arrayEntries, uint32(i))
+				}
+				bm.AddMany(arrayEntries)
+			},
+			corruptor: func(s []byte) {
+				// breaks the sort order
+				s[4] = 0
+			},
+			err: ErrArrayIncorrectSort,
+		},
+	}
+
+	for _, tt := range deserializationTests {
+		t.Run(tt.name, func(t *testing.T) {
+			bm := NewBitmap()
+			tt.loader(bm)
+			assert.NoError(t, bm.Validate())
+			serialized, err := bm.Freeze()
+			assert.NoError(t, err)
+			tt.corruptor(serialized)
+			corruptedDeserializedBitMap := NewBitmap()
+
+			assert.ErrorIs(t, corruptedDeserializedBitMap.MustFrozenView(serialized), tt.err)
+		})
+	}
+}
diff --git a/serialization_littleendian.go b/serialization_littleendian.go
index 6e3a5d55..16d356ca 100644
--- a/serialization_littleendian.go
+++ b/serialization_littleendian.go
@@ -299,6 +299,15 @@ func (rb *Bitmap) FrozenView(buf []byte) error {
 	return rb.highlowcontainer.frozenView(buf)
 }
 
+func (rb *Bitmap) MustFrozenView(buf []byte) error {
+	if err := rb.FrozenView(buf); err != nil {
+		return err
+	}
+	err := rb.Validate()
+
+	return err
+}
+
 /* Verbatim specification from CRoaring.
  *
  * FROZEN SERIALIZATION FORMAT DESCRIPTION

From c8b1e79d09089934ef130e335b41242d57531b00 Mon Sep 17 00:00:00 2001
From: bstrausser <bstrausser@locusrobotics.com>
Date: Wed, 29 May 2024 20:51:46 -0400
Subject: [PATCH 11/11] Remove extra tests

---
 serialization_frozen_test.go | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/serialization_frozen_test.go b/serialization_frozen_test.go
index 52542077..957cc6c9 100644
--- a/serialization_frozen_test.go
+++ b/serialization_frozen_test.go
@@ -137,18 +137,6 @@ func TestFrozenFormat(t *testing.T) {
 	}
 }
 
-func TestFrozenFormatSimpleValidation(t *testing.T) {
-	bm := NewBitmap()
-	deserializedBitMap := NewBitmap()
-	bm.AddRange(0, 2)
-	bm.AddRange(4, 6)
-	bm.AddRange(8, 100)
-	serialized, err := bm.Freeze()
-	assert.NoError(t, err)
-	serialized[10] = 1
-	assert.Error(t, deserializedBitMap.MustFrozenView(serialized))
-}
-
 func TestBitMapValidationFromFrozen(t *testing.T) {
 	// To understand what is going on here, read https://github.com/RoaringBitmap/RoaringFormatSpec
 	// Maintainers: The loader and corruptor are dependent on one another