Skip to content

Commit

Permalink
Optimisation: Add zero-garbage deserialiser for ByteBuffer to Roaring…
Browse files Browse the repository at this point in the history
…Bitmap (#650)

* optimisation: add deserialiser for bits byte array to RoaringBitmap

- existing most performant way was to convert it to a BitSet and then use BitSetUtil
- this adds a helper which you can use to get a RoaringBitmap directly from the byte array you read on the wire

* fix: copy only exact range provided when creating a bitmap container

- this removes the need to zero-out the threadlocal buffer everytime

* * minor refactor

* * refactor: let users provide a cached buffer if they are that needy

* * new: adds similar method in BufferBitSetUtil

* * refactor: for style

* * fix: uncomment commented out benchmarks

had hidden locally, forgot to uncomment
  • Loading branch information
shikharid committed Sep 2, 2023
1 parent fda4329 commit 07ec0dd
Show file tree
Hide file tree
Showing 5 changed files with 454 additions and 20 deletions.
95 changes: 91 additions & 4 deletions RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package org.roaringbitmap;


import java.util.Arrays;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.BitSet;


Expand All @@ -15,7 +16,7 @@ public class BitSetUtil {

// a block consists has a maximum of 1024 words, each representing 64 bits,
// thus representing at maximum 65536 bits
static final private int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; //
public static final int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; //
// 64-bit
// word

Expand Down Expand Up @@ -71,6 +72,91 @@ public static RoaringBitmap bitmapOf(final long[] words) {
return ans;
}

/**
* Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
* This method tries to minimise all kinds of memory allocation
*
* @param bb the uncompressed bitmap
* @param fastRank if set, returned bitmap is of type
* {@link org.roaringbitmap.FastRankRoaringBitmap}
* @return roaring bitmap
*/
public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) {
return bitmapOf(bb, fastRank, new long[BLOCK_LENGTH]);
}

/**
* Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
* This method tries to minimise all kinds of memory allocation
* <br>
* You can provide a cached wordsBuffer for avoiding 8 KB of extra allocation on every call
* No reference is kept to the wordsBuffer, so it can be cached as a ThreadLocal
*
* @param bb the uncompressed bitmap
* @param fastRank if set, returned bitmap is of type
* {@link org.roaringbitmap.FastRankRoaringBitmap}
* @param wordsBuffer buffer of length {@link BitSetUtil#BLOCK_LENGTH}
* @return roaring bitmap
*/
public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank, long[] wordsBuffer) {

if (wordsBuffer.length != BLOCK_LENGTH) {
throw new IllegalArgumentException("wordsBuffer length should be " + BLOCK_LENGTH);
}

bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN);
final RoaringBitmap ans = fastRank ? new FastRankRoaringBitmap() : new RoaringBitmap();

// split buffer into blocks of long[]
int containerIndex = 0;
int blockLength = 0, blockCardinality = 0, offset = 0;
long word;
while (bb.remaining() >= 8) {
word = bb.getLong();

// Add read long to block
wordsBuffer[blockLength++] = word;
blockCardinality += Long.bitCount(word);

// When block is full, add block to bitmap
if (blockLength == BLOCK_LENGTH) {
// Each block becomes a single container, if any bit is set
if (blockCardinality > 0) {
ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(offset),
BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer));
}
/*
Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64
It's harmless though, as it will happen after the last block is added
*/
offset += (BLOCK_LENGTH * Long.SIZE);
blockLength = blockCardinality = 0;
}
}

if (bb.remaining() > 0) {
// Read remaining (less than 8) bytes
// We can do this in while loop also, it will probably slow things down a bit though
word = 0;
for (int remaining = bb.remaining(), j = 0; j < remaining; j++) {
word |= (bb.get() & 0xffL) << (8 * j);
}

// Add last word to block, only if any bit is set
if (word != 0) {
wordsBuffer[blockLength++] = word;
blockCardinality += Long.bitCount(word);
}
}

// Add block to map, if any bit is set
if (blockCardinality > 0) {
ans.highLowContainer.insertNewKeyValueAt(containerIndex, Util.highbits(offset),
BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer));
}
return ans;
}

private static int cardinality(final int from, final int to, final long[] words) {
int sum = 0;
for (int i = from; i < to; i++) {
Expand All @@ -89,8 +175,9 @@ private static Container containerOf(final int from, final int to, final int blo
return arrayContainerOf(from, to, blockCardinality, words);
} else {
// otherwise use bitmap container
return new BitmapContainer(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH),
blockCardinality);
long[] container = new long[BLOCK_LENGTH];
System.arraycopy(words, from, container, 0, to - from);
return new BitmapContainer(container, blockCardinality);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package org.roaringbitmap.buffer;


import org.roaringbitmap.BitSetUtil;
import org.roaringbitmap.IntIterator;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.LongBuffer;
import java.util.Arrays;
import java.util.BitSet;

import static java.lang.Long.numberOfTrailingZeros;
Expand Down Expand Up @@ -77,6 +79,89 @@ public static MutableRoaringBitmap bitmapOf(final long[] words) {
return ans;
}

/**
* Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
* This method tries to minimise all kinds of memory allocation
*
* @param bb the uncompressed bitmap
* @return roaring bitmap
*/
public static MutableRoaringBitmap bitmapOf(ByteBuffer bb) {
return bitmapOf(bb, new long[BLOCK_LENGTH]);
}

/**
* Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
* This method tries to minimise all kinds of memory allocation
* <br>
* You can provide a cached wordsBuffer for avoiding 8 KB of extra allocation on every call
* No reference is kept to the wordsBuffer, so it can be cached as a ThreadLocal
*
* @param bb the uncompressed bitmap
* @param wordsBuffer buffer of length {@link BitSetUtil#BLOCK_LENGTH}
* @return roaring bitmap
*/
public static MutableRoaringBitmap bitmapOf(ByteBuffer bb, long[] wordsBuffer) {

if (wordsBuffer.length != BLOCK_LENGTH) {
throw new IllegalArgumentException("wordsBuffer length should be " + BLOCK_LENGTH);
}

bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN);
final MutableRoaringBitmap ans = new MutableRoaringBitmap();

// split buffer into blocks of long[]
int containerIndex = 0;
int blockLength = 0, blockCardinality = 0, offset = 0;
long word;
while (bb.remaining() >= 8) {
word = bb.getLong();

// Add read long to block
wordsBuffer[blockLength++] = word;
blockCardinality += Long.bitCount(word);

// When block is full, add block to bitmap
if (blockLength == BLOCK_LENGTH) {
// Each block becomes a single container, if any bit is set
if (blockCardinality > 0) {
((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex++,
BufferUtil.highbits(offset), BufferBitSetUtil.containerOf(0, blockLength,
blockCardinality, wordsBuffer));
}
/*
Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64
It's harmless though, as it will happen after the last block is added
*/
offset += (BLOCK_LENGTH * Long.SIZE);
blockLength = blockCardinality = 0;
}
}

if (bb.remaining() > 0) {
// Read remaining (less than 8) bytes
// We can do this in while loop also, it will probably slow things down a bit though
word = 0;
for (int remaining = bb.remaining(), j = 0; j < remaining; j++) {
word |= (bb.get() & 0xffL) << (8 * j);
}

// Add last word to block, only if any bit is set
if (word != 0) {
wordsBuffer[blockLength++] = word;
blockCardinality += Long.bitCount(word);
}
}

// Add block to map, if any bit is set
if (blockCardinality > 0) {
((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex,
BufferUtil.highbits(offset),
BufferBitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer));
}
return ans;
}

private static int cardinality(final int from, final int to, final long[] words) {
int sum = 0;
for (int i = from; i < to; i++) {
Expand All @@ -95,8 +180,9 @@ private static MappeableContainer containerOf(final int from, final int to,
return arrayContainerOf(from, to, blockCardinality, words);
} else {
// otherwise use bitmap container
return new MappeableBitmapContainer(
LongBuffer.wrap(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH)), blockCardinality);
long[] container = new long[BLOCK_LENGTH];
System.arraycopy(words, from, container, 0, to - from);
return new MappeableBitmapContainer(LongBuffer.wrap(container), blockCardinality);
}
}

Expand Down
104 changes: 104 additions & 0 deletions RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.roaringbitmap;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.nio.ByteBuffer;
import java.util.BitSet;
import java.util.Random;

Expand Down Expand Up @@ -135,4 +137,106 @@ public void testSmallBitSet10000000() {
assertEqualBitsets(bitset, bitmap);
}

/*
The ByteBuffer->RoaringBitmap just replicate similar tests written for BitSet/long[]->RoaringBitmap
*/

@Test
public void testEmptyByteBuffer() {
final BitSet bitset = new BitSet();
final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
assertEqualBitsets(bitset, bitmap);
}

@Test
public void testFlipFlapBetweenRandomFullAndEmptyByteBuffer() {
final Random random = new Random(1234);
final int nbitsPerBlock = 1024 * Long.SIZE;
final int blocks = 50;
final BitSet bitset = new BitSet(nbitsPerBlock * blocks);

// i want a mix of empty blocks, randomly filled blocks and full blocks
for (int block = 0; block < blocks * nbitsPerBlock; block += nbitsPerBlock) {
int type = random.nextInt(3);
switch (type) {
case 0:
// a block with random set bits
appendRandomBitset(random, block, bitset, nbitsPerBlock);
break;
case 1:
// a full block
bitset.set(block, block + nbitsPerBlock);
break;
default:
// and an empty block;
break;
}
}
final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
assertEqualBitsets(bitset, bitmap);
}

@Test
public void testFullByteBuffer() {
final BitSet bitset = new BitSet();
final int nbits = 1024 * Long.SIZE * 50;
bitset.set(0, nbits);
final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
assertEqualBitsets(bitset, bitmap);
}

@Test
public void testGapByteBuffer() {
for (int gap = 1; gap <= 4096; gap *= 2) {
for (int offset = 300; offset < 3000; offset += 10) {
BitSet bitset = new BitSet();
for (int k = 0; k < 100000; k += gap) {
bitset.set(k + offset);
}
final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
assertEqualBitsets(bitset, bitmap);
}
}
}

@Test
public void testRandomByteBuffer() {
final Random random = new Random(8934);
final int runs = 100;
final int maxNbits = 500000;
for (int i = 0;i < runs; ++i) {
final int offset = random.nextInt(maxNbits) & Integer.MAX_VALUE;
final BitSet bitset = randomBitset(random, offset, random.nextInt(maxNbits));
final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
assertEqualBitsets(bitset, bitmap);
}
}

@Test
public void testByteArrayWithOnly10000000thBitSet() {
final BitSet bitset = new BitSet();
bitset.set(10000000);
final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
assertEqualBitsets(bitset, bitmap);
}

@Test
public void testByteArrayWithOnly1And10000000thBitSet() {
final BitSet bitset = new BitSet();
bitset.set(1);
bitset.set(10000000);
final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
assertEqualBitsets(bitset, bitmap);
}

@Test
public void testByteArrayWithFastRank() {
final BitSet bitset = randomBitset(new Random(238), 0, 50);
final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), true);
Assertions.assertTrue(bitmap instanceof FastRankRoaringBitmap);
}

private static ByteBuffer toByteBuffer(BitSet bitset) {
return ByteBuffer.wrap(bitset.toByteArray());
}
}

0 comments on commit 07ec0dd

Please sign in to comment.