Skip to content

Commit

Permalink
implement new bitmap intersection algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
richardstartin committed May 29, 2020
1 parent 177ee33 commit 0a49a47
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 46 deletions.
67 changes: 42 additions & 25 deletions RoaringBitmap/src/main/java/org/roaringbitmap/BitmapContainer.java
Expand Up @@ -540,34 +540,48 @@ public Container iadd(int begin, int end) {

@Override
public Container iand(final ArrayContainer b2) {
return b2.and(this);// no inplace possible
if (-1 == cardinality) {
// actually we can avoid allocating in lazy mode
Util.intersect(bitmap, b2.content, b2.cardinality);
return this;
} else {
return b2.and(this);
}
}

@Override
public Container iand(final BitmapContainer b2) {
int newCardinality = 0;
for (int k = 0; k < this.bitmap.length; ++k) {
newCardinality += Long.bitCount(this.bitmap[k] & b2.bitmap[k]);
}
if (newCardinality > ArrayContainer.DEFAULT_MAX_SIZE) {
for (int k = 0; k < this.bitmap.length; ++k) {
this.bitmap[k] = this.bitmap[k] & b2.bitmap[k];
if (-1 == cardinality) {
// in lazy mode, just intersect the bitmaps, can repair afterwards
for (int i = 0; i < bitmap.length; ++i) {
bitmap[i] &= b2.bitmap[i];
}
this.cardinality = newCardinality;
return this;
} else {
int newCardinality = 0;
for (int k = 0; k < this.bitmap.length; ++k) {
newCardinality += Long.bitCount(this.bitmap[k] & b2.bitmap[k]);
}
if (newCardinality > ArrayContainer.DEFAULT_MAX_SIZE) {
for (int k = 0; k < this.bitmap.length; ++k) {
this.bitmap[k] = this.bitmap[k] & b2.bitmap[k];
}
this.cardinality = newCardinality;
return this;
}
ArrayContainer ac = new ArrayContainer(newCardinality);
Util.fillArrayAND(ac.content, this.bitmap, b2.bitmap);
ac.cardinality = newCardinality;
return ac;
}
ArrayContainer ac = new ArrayContainer(newCardinality);
Util.fillArrayAND(ac.content, this.bitmap, b2.bitmap);
ac.cardinality = newCardinality;
return ac;
}

@Override
public Container iand(RunContainer x) {
// could probably be replaced with return iand(x.toBitmapOrArrayContainer());
final int card = x.getCardinality();
if (card <= ArrayContainer.DEFAULT_MAX_SIZE) {
// no point in doing it in-place
if (-1 != cardinality && card <= ArrayContainer.DEFAULT_MAX_SIZE) {
// no point in doing it in-place, unless it's a lazy operation
ArrayContainer answer = new ArrayContainer(card);
answer.cardinality = 0;
for (int rlepos = 0; rlepos < x.nbrruns; ++rlepos) {
Expand All @@ -585,17 +599,20 @@ public Container iand(RunContainer x) {
int end = (x.getValue(rlepos));
int prevOnes = cardinalityInRange(start, end);
Util.resetBitmapRange(this.bitmap, start, end);
updateCardinality(prevOnes, 0);
start = end + (x.getLength(rlepos)) + 1;
}
int ones = cardinalityInRange(start, MAX_CAPACITY);
Util.resetBitmapRange(this.bitmap, start, MAX_CAPACITY);
updateCardinality(ones, 0);
if (getCardinality() > ArrayContainer.DEFAULT_MAX_SIZE) {
return this;
} else {
return toArrayContainer();
if (-1 != cardinality) {
updateCardinality(prevOnes, 0);
}
start = end + x.getLength(rlepos) + 1;
}
if (-1 != cardinality) { // in lazy mode don't try to trim
int ones = cardinalityInRange(start, MAX_CAPACITY);
Util.resetBitmapRange(this.bitmap, start, MAX_CAPACITY);
updateCardinality(ones, 0);
if (getCardinality() <= ArrayContainer.DEFAULT_MAX_SIZE) {
return toArrayContainer();
}
}
return this;
}

@Override
Expand Down
Expand Up @@ -7,7 +7,6 @@
import java.util.*;



/**
* Fast algorithms to aggregate many bitmaps.
*
Expand Down Expand Up @@ -37,6 +36,9 @@ public static RoaringBitmap and(Iterator<? extends RoaringBitmap> bitmaps) {
* @return aggregated bitmap
*/
public static RoaringBitmap and(RoaringBitmap... bitmaps) {
if (bitmaps.length > 2) {
return workShyAnd(bitmaps);
}
return naive_and(bitmaps);
}

Expand Down Expand Up @@ -268,6 +270,70 @@ public static RoaringBitmap naive_and(RoaringBitmap... bitmaps) {
return answer;
}

/**
* Computes the intersection by first intersecting the keys, avoids
* materialising containers.
*
* @param bitmaps the inputs
* @return the intersection of the bitmaps
*/
public static RoaringBitmap workShyAnd(RoaringBitmap... bitmaps) {
long[] words = new long[1024];
RoaringBitmap first = bitmaps[0];
for (int i = 0; i < first.highLowContainer.size; ++i) {
char key = first.highLowContainer.keys[i];
words[key >>> 6] |= 1L << key;
}
int containersInResult = 0;
for (int i = 1; i < bitmaps.length; ++i) {
containersInResult = Util.intersect(words,
bitmaps[i].highLowContainer.keys, bitmaps[i].highLowContainer.size);
}
if (containersInResult == 0) {
return new RoaringBitmap();
}
char[] keys = new char[containersInResult];
int base = 0;
int pos = 0;
for (long word : words) {
while (word != 0L) {
keys[pos++] = (char)(base + Long.numberOfTrailingZeros(word));
word &= (word - 1);
}
base += 64;
}
Container[][] containers = new Container[containersInResult][bitmaps.length];
for (int i = 0; i < bitmaps.length; ++i) {
RoaringBitmap bitmap = bitmaps[i];
int position = 0;
for (int j = 0; j < bitmap.highLowContainer.size; ++j) {
char key = bitmap.highLowContainer.keys[j];
if ((words[key >>> 6] & (1L << key)) != 0) {
containers[position++][i] = bitmap.highLowContainer.values[j];
}
}
}

RoaringArray array =
new RoaringArray(keys, new Container[containersInResult], 0);
for (int i = 0; i < containersInResult; ++i) {
Container[] slice = containers[i];
Arrays.fill(words, -1L);
Container tmp = new BitmapContainer(words, -1);
for (Container container : slice) {
Container and = tmp.iand(container);
if (and != tmp) {
tmp = and;
}
}
tmp = tmp.repairAfterLazy();
if (!tmp.isEmpty()) {
array.append(keys[i], tmp instanceof BitmapContainer ? tmp.clone() : tmp);
}
}
return new RoaringBitmap(array);
}


/**
* Compute overall OR between bitmaps two-by-two.
Expand Down
40 changes: 20 additions & 20 deletions RoaringBitmap/src/main/java/org/roaringbitmap/RunContainer.java
Expand Up @@ -12,8 +12,7 @@
import java.nio.CharBuffer;
import java.util.Arrays;
import java.util.Iterator;


import java.util.concurrent.ConcurrentHashMap;


/**
Expand Down Expand Up @@ -381,17 +380,18 @@ public Container and(BitmapContainer x) {

@Override
public Container and(RunContainer x) {
RunContainer answer = new RunContainer(new char[2 * (this.nbrruns + x.nbrruns)], 0);
int maxRunsAfterIntersection = nbrruns + x.nbrruns;
RunContainer answer = new RunContainer(new char[2 * maxRunsAfterIntersection], 0);
if (isEmpty()) {
return answer;
}
int rlepos = 0;
int xrlepos = 0;
int start = (this.getValue(rlepos));
int end = start + (this.getLength(rlepos)) + 1;
int xstart = (x.getValue(xrlepos));
int xend = xstart + (x.getLength(xrlepos)) + 1;
while ((rlepos < this.nbrruns) && (xrlepos < x.nbrruns)) {
int start = this.getValue(rlepos);
int end = start + this.getLength(rlepos) + 1;
int xstart = x.getValue(xrlepos);
int xend = xstart + x.getLength(xrlepos) + 1;
while (rlepos < this.nbrruns && xrlepos < x.nbrruns) {
if (end <= xstart) {
if (ENABLE_GALLOPING_AND) {
rlepos = skipAhead(this, rlepos, xstart); // skip over runs until we have end > xstart (or
Expand All @@ -401,8 +401,8 @@ public Container and(RunContainer x) {
}

if (rlepos < this.nbrruns) {
start = (this.getValue(rlepos));
end = start + (this.getLength(rlepos)) + 1;
start = this.getValue(rlepos);
end = start + this.getLength(rlepos) + 1;
}
} else if (xend <= start) {
// exit the second run
Expand All @@ -413,8 +413,8 @@ public Container and(RunContainer x) {
}

if (xrlepos < x.nbrruns) {
xstart = (x.getValue(xrlepos));
xend = xstart + (x.getLength(xrlepos)) + 1;
xstart = x.getValue(xrlepos);
xend = xstart + x.getLength(xrlepos) + 1;
}
} else {// they overlap
final int lateststart = Math.max(start, xstart);
Expand All @@ -424,27 +424,27 @@ public Container and(RunContainer x) {
rlepos++;
xrlepos++;
if (rlepos < this.nbrruns) {
start = (this.getValue(rlepos));
end = start + (this.getLength(rlepos)) + 1;
start = this.getValue(rlepos);
end = start + this.getLength(rlepos) + 1;
}
if (xrlepos < x.nbrruns) {
xstart = (x.getValue(xrlepos));
xend = xstart + (x.getLength(xrlepos)) + 1;
xstart = x.getValue(xrlepos);
xend = xstart + x.getLength(xrlepos) + 1;
}
} else if (end < xend) {
earliestend = end;
rlepos++;
if (rlepos < this.nbrruns) {
start = (this.getValue(rlepos));
end = start + (this.getLength(rlepos)) + 1;
start = this.getValue(rlepos);
end = start + this.getLength(rlepos) + 1;
}

} else {// end > xend
earliestend = xend;
xrlepos++;
if (xrlepos < x.nbrruns) {
xstart = (x.getValue(xrlepos));
xend = xstart + (x.getLength(xrlepos)) + 1;
xstart = x.getValue(xrlepos);
xend = xstart + x.getLength(xrlepos) + 1;
}
}
answer.valueslength[2 * answer.nbrruns] = (char) lateststart;
Expand Down
33 changes: 33 additions & 0 deletions RoaringBitmap/src/main/java/org/roaringbitmap/Util.java
Expand Up @@ -427,6 +427,39 @@ public static void resetBitmapRange(long[] bitmap, int start, int end) {

}

/**
* Intersects the bitmap with the array, returning the cardinality of the result
* @param bitmap the bitmap
* @param array the array
* @param length how much of the array to consume
* @return the size of the intersection
*/
public static int intersect(long[] bitmap, char[] array, int length) {
int lastWordIndex = 0;
int wordIndex = 0;
long word = 0L;
int cardinality = 0;
for (int i = 0; i < length; ++i) {
wordIndex = array[i] >>> 6;
if (wordIndex != lastWordIndex) {
bitmap[lastWordIndex] &= word;
cardinality += Long.bitCount(bitmap[lastWordIndex]);
word = 0L;
Arrays.fill(bitmap, lastWordIndex + 1, wordIndex, 0L);
lastWordIndex = wordIndex;
}
word |= 1L << array[i];
}
if (word != 0L) {
bitmap[wordIndex] &= word;
cardinality += Long.bitCount(bitmap[lastWordIndex]);
}
if (wordIndex < bitmap.length) {
Arrays.fill(bitmap, wordIndex + 1, bitmap.length, 0L);
}
return cardinality;
}

/**
* Given a word w, return the position of the jth true bit.
*
Expand Down
Expand Up @@ -64,6 +64,17 @@ public void priorityqueue_or2() {

private static class ExtendedRoaringBitmap extends RoaringBitmap {}


@Test
public void testWorkShyAnd() {
final RoaringBitmap b1 = RoaringBitmap.bitmapOf(1, 2, 0x10001, 0x20001, 0x30001);
final RoaringBitmap b2 = RoaringBitmap.bitmapOf(2, 3, 0x20002, 0x30001);
final RoaringBitmap bResult = FastAggregation.workShyAnd(b1, b2);
assertFalse(bResult.contains(1));
assertTrue(bResult.contains(2));
assertFalse(bResult.contains(3));
}

@Test
public void testAndWithIterator() {
final RoaringBitmap b1 = RoaringBitmap.bitmapOf(1, 2);
Expand Down

0 comments on commit 0a49a47

Please sign in to comment.