From 07ec0dd35659fc1e8109aba74c3a30d66a67edf6 Mon Sep 17 00:00:00 2001
From: Shikhar Singh <10097451+shikharid@users.noreply.github.com>
Date: Sat, 2 Sep 2023 18:58:11 +0530
Subject: [PATCH] Optimisation: Add zero-garbage deserialiser for ByteBuffer to
RoaringBitmap (#650)
* optimisation: add deserialiser for bits byte array to RoaringBitmap
- existing most performant way was to convert it to a BitSet and then use BitSetUtil
- this adds a helper which you can use to get a RoaringBitmap directly from the byte array you read on the wire
* fix: copy only exact range provided when creating a bitmap container
- this removes the need to zero-out the threadlocal buffer everytime
* * minor refactor
* * refactor: let users provide a cached buffer if they are that needy
* * new: adds similar method in BufferBitSetUtil
* * refactor: for style
* * fix: uncomment commented out benchmarks
had hidden locally, forgot to uncomment
---
.../java/org/roaringbitmap/BitSetUtil.java | 95 +++++++++++++++-
.../buffer/BufferBitSetUtil.java | 92 +++++++++++++++-
.../org/roaringbitmap/TestBitSetUtil.java | 104 ++++++++++++++++++
.../roaringbitmap/buffer/TestBitSetUtil.java | 97 ++++++++++++++++
.../roaringbitmap/BitSetUtilBenchmark.java | 86 ++++++++++++---
5 files changed, 454 insertions(+), 20 deletions(-)
diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java
index acbce7695..211e8f78b 100644
--- a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java
+++ b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java
@@ -1,7 +1,8 @@
package org.roaringbitmap;
-import java.util.Arrays;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.util.BitSet;
@@ -15,7 +16,7 @@ public class BitSetUtil {
// a block consists has a maximum of 1024 words, each representing 64 bits,
// thus representing at maximum 65536 bits
- static final private int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; //
+ public static final int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; //
// 64-bit
// word
@@ -71,6 +72,91 @@ public static RoaringBitmap bitmapOf(final long[] words) {
return ans;
}
+ /**
+ * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
+ * This method tries to minimise all kinds of memory allocation
+ *
+ * @param bb the uncompressed bitmap
+ * @param fastRank if set, returned bitmap is of type
+ * {@link org.roaringbitmap.FastRankRoaringBitmap}
+ * @return roaring bitmap
+ */
+ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) {
+ return bitmapOf(bb, fastRank, new long[BLOCK_LENGTH]);
+ }
+
+ /**
+ * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
+ * This method tries to minimise all kinds of memory allocation
+ *
+ * You can provide a cached wordsBuffer for avoiding 8 KB of extra allocation on every call
+ * No reference is kept to the wordsBuffer, so it can be cached as a ThreadLocal
+ *
+ * @param bb the uncompressed bitmap
+ * @param fastRank if set, returned bitmap is of type
+ * {@link org.roaringbitmap.FastRankRoaringBitmap}
+ * @param wordsBuffer buffer of length {@link BitSetUtil#BLOCK_LENGTH}
+ * @return roaring bitmap
+ */
+ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank, long[] wordsBuffer) {
+
+ if (wordsBuffer.length != BLOCK_LENGTH) {
+ throw new IllegalArgumentException("wordsBuffer length should be " + BLOCK_LENGTH);
+ }
+
+ bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN);
+ final RoaringBitmap ans = fastRank ? new FastRankRoaringBitmap() : new RoaringBitmap();
+
+ // split buffer into blocks of long[]
+ int containerIndex = 0;
+ int blockLength = 0, blockCardinality = 0, offset = 0;
+ long word;
+ while (bb.remaining() >= 8) {
+ word = bb.getLong();
+
+ // Add read long to block
+ wordsBuffer[blockLength++] = word;
+ blockCardinality += Long.bitCount(word);
+
+ // When block is full, add block to bitmap
+ if (blockLength == BLOCK_LENGTH) {
+ // Each block becomes a single container, if any bit is set
+ if (blockCardinality > 0) {
+ ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(offset),
+ BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer));
+ }
+ /*
+ Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64
+ It's harmless though, as it will happen after the last block is added
+ */
+ offset += (BLOCK_LENGTH * Long.SIZE);
+ blockLength = blockCardinality = 0;
+ }
+ }
+
+ if (bb.remaining() > 0) {
+ // Read remaining (less than 8) bytes
+ // We can do this in while loop also, it will probably slow things down a bit though
+ word = 0;
+ for (int remaining = bb.remaining(), j = 0; j < remaining; j++) {
+ word |= (bb.get() & 0xffL) << (8 * j);
+ }
+
+ // Add last word to block, only if any bit is set
+ if (word != 0) {
+ wordsBuffer[blockLength++] = word;
+ blockCardinality += Long.bitCount(word);
+ }
+ }
+
+ // Add block to map, if any bit is set
+ if (blockCardinality > 0) {
+ ans.highLowContainer.insertNewKeyValueAt(containerIndex, Util.highbits(offset),
+ BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer));
+ }
+ return ans;
+ }
+
private static int cardinality(final int from, final int to, final long[] words) {
int sum = 0;
for (int i = from; i < to; i++) {
@@ -89,8 +175,9 @@ private static Container containerOf(final int from, final int to, final int blo
return arrayContainerOf(from, to, blockCardinality, words);
} else {
// otherwise use bitmap container
- return new BitmapContainer(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH),
- blockCardinality);
+ long[] container = new long[BLOCK_LENGTH];
+ System.arraycopy(words, from, container, 0, to - from);
+ return new BitmapContainer(container, blockCardinality);
}
}
diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java b/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java
index 0f2229104..78192dcc6 100644
--- a/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java
+++ b/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java
@@ -1,11 +1,13 @@
package org.roaringbitmap.buffer;
+import org.roaringbitmap.BitSetUtil;
import org.roaringbitmap.IntIterator;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.LongBuffer;
-import java.util.Arrays;
import java.util.BitSet;
import static java.lang.Long.numberOfTrailingZeros;
@@ -77,6 +79,89 @@ public static MutableRoaringBitmap bitmapOf(final long[] words) {
return ans;
}
+ /**
+ * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
+ * This method tries to minimise all kinds of memory allocation
+ *
+ * @param bb the uncompressed bitmap
+ * @return roaring bitmap
+ */
+ public static MutableRoaringBitmap bitmapOf(ByteBuffer bb) {
+ return bitmapOf(bb, new long[BLOCK_LENGTH]);
+ }
+
+ /**
+ * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer
+ * This method tries to minimise all kinds of memory allocation
+ *
+ * You can provide a cached wordsBuffer for avoiding 8 KB of extra allocation on every call
+ * No reference is kept to the wordsBuffer, so it can be cached as a ThreadLocal
+ *
+ * @param bb the uncompressed bitmap
+ * @param wordsBuffer buffer of length {@link BitSetUtil#BLOCK_LENGTH}
+ * @return roaring bitmap
+ */
+ public static MutableRoaringBitmap bitmapOf(ByteBuffer bb, long[] wordsBuffer) {
+
+ if (wordsBuffer.length != BLOCK_LENGTH) {
+ throw new IllegalArgumentException("wordsBuffer length should be " + BLOCK_LENGTH);
+ }
+
+ bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN);
+ final MutableRoaringBitmap ans = new MutableRoaringBitmap();
+
+ // split buffer into blocks of long[]
+ int containerIndex = 0;
+ int blockLength = 0, blockCardinality = 0, offset = 0;
+ long word;
+ while (bb.remaining() >= 8) {
+ word = bb.getLong();
+
+ // Add read long to block
+ wordsBuffer[blockLength++] = word;
+ blockCardinality += Long.bitCount(word);
+
+ // When block is full, add block to bitmap
+ if (blockLength == BLOCK_LENGTH) {
+ // Each block becomes a single container, if any bit is set
+ if (blockCardinality > 0) {
+ ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex++,
+ BufferUtil.highbits(offset), BufferBitSetUtil.containerOf(0, blockLength,
+ blockCardinality, wordsBuffer));
+ }
+ /*
+ Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64
+ It's harmless though, as it will happen after the last block is added
+ */
+ offset += (BLOCK_LENGTH * Long.SIZE);
+ blockLength = blockCardinality = 0;
+ }
+ }
+
+ if (bb.remaining() > 0) {
+ // Read remaining (less than 8) bytes
+ // We can do this in while loop also, it will probably slow things down a bit though
+ word = 0;
+ for (int remaining = bb.remaining(), j = 0; j < remaining; j++) {
+ word |= (bb.get() & 0xffL) << (8 * j);
+ }
+
+ // Add last word to block, only if any bit is set
+ if (word != 0) {
+ wordsBuffer[blockLength++] = word;
+ blockCardinality += Long.bitCount(word);
+ }
+ }
+
+ // Add block to map, if any bit is set
+ if (blockCardinality > 0) {
+ ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex,
+ BufferUtil.highbits(offset),
+ BufferBitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer));
+ }
+ return ans;
+ }
+
private static int cardinality(final int from, final int to, final long[] words) {
int sum = 0;
for (int i = from; i < to; i++) {
@@ -95,8 +180,9 @@ private static MappeableContainer containerOf(final int from, final int to,
return arrayContainerOf(from, to, blockCardinality, words);
} else {
// otherwise use bitmap container
- return new MappeableBitmapContainer(
- LongBuffer.wrap(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH)), blockCardinality);
+ long[] container = new long[BLOCK_LENGTH];
+ System.arraycopy(words, from, container, 0, to - from);
+ return new MappeableBitmapContainer(LongBuffer.wrap(container), blockCardinality);
}
}
diff --git a/RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java b/RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java
index cf0fe453a..1f945b586 100644
--- a/RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java
+++ b/RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java
@@ -1,7 +1,9 @@
package org.roaringbitmap;
+import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
+import java.nio.ByteBuffer;
import java.util.BitSet;
import java.util.Random;
@@ -135,4 +137,106 @@ public void testSmallBitSet10000000() {
assertEqualBitsets(bitset, bitmap);
}
+ /*
+ The ByteBuffer->RoaringBitmap just replicate similar tests written for BitSet/long[]->RoaringBitmap
+ */
+
+ @Test
+ public void testEmptyByteBuffer() {
+ final BitSet bitset = new BitSet();
+ final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testFlipFlapBetweenRandomFullAndEmptyByteBuffer() {
+ final Random random = new Random(1234);
+ final int nbitsPerBlock = 1024 * Long.SIZE;
+ final int blocks = 50;
+ final BitSet bitset = new BitSet(nbitsPerBlock * blocks);
+
+ // i want a mix of empty blocks, randomly filled blocks and full blocks
+ for (int block = 0; block < blocks * nbitsPerBlock; block += nbitsPerBlock) {
+ int type = random.nextInt(3);
+ switch (type) {
+ case 0:
+ // a block with random set bits
+ appendRandomBitset(random, block, bitset, nbitsPerBlock);
+ break;
+ case 1:
+ // a full block
+ bitset.set(block, block + nbitsPerBlock);
+ break;
+ default:
+ // and an empty block;
+ break;
+ }
+ }
+ final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testFullByteBuffer() {
+ final BitSet bitset = new BitSet();
+ final int nbits = 1024 * Long.SIZE * 50;
+ bitset.set(0, nbits);
+ final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testGapByteBuffer() {
+ for (int gap = 1; gap <= 4096; gap *= 2) {
+ for (int offset = 300; offset < 3000; offset += 10) {
+ BitSet bitset = new BitSet();
+ for (int k = 0; k < 100000; k += gap) {
+ bitset.set(k + offset);
+ }
+ final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
+ assertEqualBitsets(bitset, bitmap);
+ }
+ }
+ }
+
+ @Test
+ public void testRandomByteBuffer() {
+ final Random random = new Random(8934);
+ final int runs = 100;
+ final int maxNbits = 500000;
+ for (int i = 0;i < runs; ++i) {
+ final int offset = random.nextInt(maxNbits) & Integer.MAX_VALUE;
+ final BitSet bitset = randomBitset(random, offset, random.nextInt(maxNbits));
+ final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
+ assertEqualBitsets(bitset, bitmap);
+ }
+ }
+
+ @Test
+ public void testByteArrayWithOnly10000000thBitSet() {
+ final BitSet bitset = new BitSet();
+ bitset.set(10000000);
+ final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testByteArrayWithOnly1And10000000thBitSet() {
+ final BitSet bitset = new BitSet();
+ bitset.set(1);
+ bitset.set(10000000);
+ final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false);
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testByteArrayWithFastRank() {
+ final BitSet bitset = randomBitset(new Random(238), 0, 50);
+ final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), true);
+ Assertions.assertTrue(bitmap instanceof FastRankRoaringBitmap);
+ }
+
+ private static ByteBuffer toByteBuffer(BitSet bitset) {
+ return ByteBuffer.wrap(bitset.toByteArray());
+ }
}
diff --git a/RoaringBitmap/src/test/java/org/roaringbitmap/buffer/TestBitSetUtil.java b/RoaringBitmap/src/test/java/org/roaringbitmap/buffer/TestBitSetUtil.java
index 9052576de..dda8cce32 100644
--- a/RoaringBitmap/src/test/java/org/roaringbitmap/buffer/TestBitSetUtil.java
+++ b/RoaringBitmap/src/test/java/org/roaringbitmap/buffer/TestBitSetUtil.java
@@ -4,6 +4,7 @@
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
+import java.nio.ByteBuffer;
import java.util.BitSet;
import java.util.Random;
@@ -137,4 +138,100 @@ public void testSmallBitSet10000000() {
assertEqualBitsets(bitset, bitmap);
}
+ /*
+ The ByteBuffer->RoaringBitmap just replicate similar tests written for BitSet/long[]->RoaringBitmap
+ */
+
+ @Test
+ public void testEmptyByteBuffer() {
+ final BitSet bitset = new BitSet();
+ final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset));
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testFlipFlapBetweenRandomFullAndEmptyByteBuffer() {
+ final Random random = new Random(1234);
+ final int nbitsPerBlock = 1024 * Long.SIZE;
+ final int blocks = 50;
+ final BitSet bitset = new BitSet(nbitsPerBlock * blocks);
+
+ // i want a mix of empty blocks, randomly filled blocks and full blocks
+ for (int block = 0; block < blocks * nbitsPerBlock; block += nbitsPerBlock) {
+ int type = random.nextInt(3);
+ switch (type) {
+ case 0:
+ // a block with random set bits
+ appendRandomBitset(random, block, bitset, nbitsPerBlock);
+ break;
+ case 1:
+ // a full block
+ bitset.set(block, block + nbitsPerBlock);
+ break;
+ default:
+ // and an empty block;
+ break;
+ }
+ }
+ final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset));
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testFullByteBuffer() {
+ final BitSet bitset = new BitSet();
+ final int nbits = 1024 * Long.SIZE * 50;
+ bitset.set(0, nbits);
+ final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset));
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testGapByteBuffer() {
+ for (int gap = 1; gap <= 4096; gap *= 2) {
+ for (int offset = 300; offset < 3000; offset += 10) {
+ BitSet bitset = new BitSet();
+ for (int k = 0; k < 100000; k += gap) {
+ bitset.set(k + offset);
+ }
+ final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset));
+ assertEqualBitsets(bitset, bitmap);
+ }
+ }
+ }
+
+ @Test
+ public void testRandomByteBuffer() {
+ final Random random = new Random(8934);
+ final int runs = 100;
+ final int maxNbits = 500000;
+ for (int i = 0;i < runs; ++i) {
+ final int offset = random.nextInt(maxNbits) & Integer.MAX_VALUE;
+ final BitSet bitset = randomBitset(random, offset, random.nextInt(maxNbits));
+ final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset));
+ assertEqualBitsets(bitset, bitmap);
+ }
+ }
+
+ @Test
+ public void testByteArrayWithOnly10000000thBitSet() {
+ final BitSet bitset = new BitSet();
+ bitset.set(10000000);
+ final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset));
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ @Test
+ public void testByteArrayWithOnly1And10000000thBitSet() {
+ final BitSet bitset = new BitSet();
+ bitset.set(1);
+ bitset.set(10000000);
+ final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset));
+ assertEqualBitsets(bitset, bitmap);
+ }
+
+ private static ByteBuffer toByteBuffer(BitSet bitset) {
+ return ByteBuffer.wrap(bitset.toByteArray());
+ }
+
}
diff --git a/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java b/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java
index db7cd0fcd..3c0950eeb 100644
--- a/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java
+++ b/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java
@@ -1,18 +1,14 @@
package org.roaringbitmap;
+import org.openjdk.jmh.annotations.*;
+
import java.io.DataInputStream;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.BitSet;
import java.util.concurrent.TimeUnit;
import java.util.zip.GZIPInputStream;
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.BenchmarkMode;
-import org.openjdk.jmh.annotations.Mode;
-import org.openjdk.jmh.annotations.OutputTimeUnit;
-import org.openjdk.jmh.annotations.Scope;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.annotations.State;
-
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public class BitSetUtilBenchmark {
@@ -36,6 +32,47 @@ public long BitSetToRoaringUsingBitSetUtil(Data d) {
return bogus;
}
+ private static final ThreadLocal WORD_BLOCK = ThreadLocal.withInitial(() ->
+ new long[BitSetUtil.BLOCK_LENGTH]);
+
+ /*
+ Given an uncompressed bitset represented as a byte array (basically, as read on wire)
+ Below benchmarks the perf difference you will get when:
+ 1. ByteArrayToRoaring - Directly convert the byte array to a roaring bitmap by wrapping it in a ByteBuffer
+ 2. ByteArrayToBitsetToRoaring - Convert the byte array to a BitSet and then create the bitmap using it
+ 3. ByteArrayToRoaringWithCachedBuffer - Directly convert and use a cached reused buffer
+ */
+
+ @Benchmark
+ public long ByteArrayToRoaring(Data d) {
+ long bogus = 0;
+ for (int i = 0; i < d.bitsetsAsBytes.length; i++) {
+ ByteBuffer bb = ByteBuffer.wrap(d.bitsetsAsBytes[i]);
+ bogus += BitSetUtil.bitmapOf(bb, false).getCardinality();
+ }
+ return bogus;
+ }
+
+ @Benchmark
+ public long ByteArrayToRoaringWithCachedBuffer(Data d) {
+ long bogus = 0;
+ for (int i = 0; i < d.bitsetsAsBytes.length; i++) {
+ ByteBuffer bb = ByteBuffer.wrap(d.bitsetsAsBytes[i]);
+ bogus += BitSetUtil.bitmapOf(bb, false, WORD_BLOCK.get()).getCardinality();
+ }
+ return bogus;
+ }
+
+
+ @Benchmark
+ public long ByteArrayToBitsetToRoaring(Data d) {
+ long bogus = 0;
+ for (int i = 0; i < d.bitsetsAsBytes.length; i++) {
+ BitSet bitset = BitSet.valueOf(d.bitsetsAsBytes[i]);
+ bogus += BitSetUtil.bitmapOf(bitset).getCardinality();
+ }
+ return bogus;
+ }
private static RoaringBitmap bitmapTheNaiveWay(final long[] words) {
int cardinality = 0;
@@ -68,30 +105,54 @@ private static RoaringBitmap bitmapTheNaiveWay(final long[] words) {
@State(Scope.Benchmark)
public static class Data {
long[][] bitsets;
+ byte[][] bitsetsAsBytes;
@Setup
public void setup() throws IOException {
final String bitset = "/real-roaring-dataset/bitsets_1925630_96.gz";
this.getClass().getResourceAsStream(bitset);
this.bitsets = deserialize(bitset);
+ this.bitsetsAsBytes = bitsetsAsBytes(bitsets);
+ }
+
+ private byte[][] bitsetsAsBytes(long[][] bitsets) {
+ byte[][] bitsetsAsBytes = new byte[bitsets.length][];
+ for (int i = 0; i < bitsets.length; i++) {
+ long[] bitset = bitsets[i];
+ bitsetsAsBytes[i] = BitSet.valueOf(bitset).toByteArray();
+ }
+ return bitsetsAsBytes;
}
private long[][] deserialize(final String bitsetResource) throws IOException {
final DataInputStream dos = new DataInputStream(
new GZIPInputStream(this.getClass().getResourceAsStream(bitsetResource)));
try {
- final long[][] bitset = new long[dos.readInt()][];
- for (int i = 0; i < bitset.length; i++) {
+ /* Change this value to see number for small vs large bitsets
+ wordSize = 64 represents 4096 bits (512 bytes)
+ wordSize = 512 represents 32768 bits (~4kb)
+ wordSize = 8192 represents 524288 bits (~64kb)
+ wordSize = 131072 represents 8388608 bits (~8.3 million, ~1mb)
+ */
+ final int minTotalWordSize = 512;
+ // Try to keep size of bitsets created below 1 gb
+ final int bitsetCnt = Math.min((1024 * 1024 * 1024) / (minTotalWordSize * 8), dos.readInt());
+
+ final long[][] bitset = new long[bitsetCnt][];
+ for (int i = 0; i < bitsetCnt; i++) {
final int wordSize = dos.readInt();
// for duplication, to make bitsets wider
- final int clone = 0;
- final long words[] = new long[wordSize * (clone + 1)];
+ final int clone = (minTotalWordSize + wordSize) / wordSize;
+ final long[] words = new long[wordSize * (clone + 1)];
for (int j = 0; j < wordSize; j++) {
words[j] = dos.readLong();
}
// duplicate long[] n times to the right
+ for(int j = 0; j < clone; j++) {
+ System.arraycopy(words, 0, words, (j+1)*wordSize, wordSize);
+ }
bitset[i] = words;
}
return bitset;
@@ -101,5 +162,4 @@ private long[][] deserialize(final String bitsetResource) throws IOException {
}
}
-
}