From df4f7bb24ac6356680a4c791ae18af1f1faab246 Mon Sep 17 00:00:00 2001 From: shikharid Date: Sun, 6 Aug 2023 23:24:05 +0530 Subject: [PATCH 1/7] optimisation: add deserialiser for bits byte array to RoaringBitmap - existing most performant way was to convert it to a BitSet and then use BitSetUtil - this adds a helper which you can use to get a RoaringBitmap directly from the byte array you read on the wire --- .../java/org/roaringbitmap/BitSetUtil.java | 71 ++++++++++++ .../org/roaringbitmap/TestBitSetUtil.java | 104 ++++++++++++++++++ .../roaringbitmap/BitSetUtilBenchmark.java | 72 +++++++++--- 3 files changed, 234 insertions(+), 13 deletions(-) diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java index acbce7695..c8df7ddba 100644 --- a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java +++ b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java @@ -1,6 +1,8 @@ package org.roaringbitmap; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.Arrays; import java.util.BitSet; @@ -71,6 +73,75 @@ public static RoaringBitmap bitmapOf(final long[] words) { return ans; } + // To avoid memory allocation, reuse ThreadLocal buffers + private static final ThreadLocal WORD_BLOCK = ThreadLocal.withInitial(() -> + new long[BLOCK_LENGTH]); + + /** + * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer + * This method tries to minimise all kinds of memory allocation + * + * @param bb the uncompressed bitmap + * @param fastRank if set, returned bitmap is of type + * {@link org.roaringbitmap.FastRankRoaringBitmap} + * @return roaring bitmap + */ + public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { + + bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN); + final RoaringBitmap ans = fastRank ? new FastRankRoaringBitmap() : new RoaringBitmap(); + + // split buffer into blocks of long[], reuse a ThreadLocal array for blocks + final long[] words = WORD_BLOCK.get(); + int containerIndex = 0; + int blockLength = 0, blockCardinality = 0, offset = 0; + long word; + while (bb.remaining() >= 8) { + word = bb.getLong(); + + // Add read long to block + words[blockLength++] = word; + blockCardinality += Long.bitCount(word); + + // When block is full, add block to bitmap + if (blockLength == BLOCK_LENGTH) { + // Each block becomes a single container, if any bit is set + containerIndex = addBlock(ans, words, containerIndex, blockLength, + blockCardinality, offset); + offset += (blockLength * Long.SIZE); + blockLength = blockCardinality = 0; + } + } + + if (bb.remaining() > 0) { + // Read remaining (less than 8) bytes + word = 0; + for (int remaining = bb.remaining(), j = 0; j < remaining; j++) { + word |= (bb.get() & 0xffL) << (8 * j); + } + + // Add last word to block, only if any bit is set + if (word != 0) { + words[blockLength++] = word; + blockCardinality += Long.bitCount(word); + } + } + + // Add block to map, if any bit is set + addBlock(ans, words, containerIndex, blockLength, blockCardinality, offset); + return ans; + } + + private static int addBlock(RoaringBitmap ans, long[] words, int containerIndex, int blockLength, + int blockCardinality, int offset) { + if (blockCardinality > 0) { + ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(offset), + BitSetUtil.containerOf(0, blockLength, blockCardinality, words)); + Arrays.fill(words, 0); // Zero-out thread local buffer after use + } + return containerIndex; + } + private static int cardinality(final int from, final int to, final long[] words) { int sum = 0; for (int i = from; i < to; i++) { diff --git a/RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java b/RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java index cf0fe453a..1f945b586 100644 --- a/RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java +++ b/RoaringBitmap/src/test/java/org/roaringbitmap/TestBitSetUtil.java @@ -1,7 +1,9 @@ package org.roaringbitmap; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.nio.ByteBuffer; import java.util.BitSet; import java.util.Random; @@ -135,4 +137,106 @@ public void testSmallBitSet10000000() { assertEqualBitsets(bitset, bitmap); } + /* + The ByteBuffer->RoaringBitmap just replicate similar tests written for BitSet/long[]->RoaringBitmap + */ + + @Test + public void testEmptyByteBuffer() { + final BitSet bitset = new BitSet(); + final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testFlipFlapBetweenRandomFullAndEmptyByteBuffer() { + final Random random = new Random(1234); + final int nbitsPerBlock = 1024 * Long.SIZE; + final int blocks = 50; + final BitSet bitset = new BitSet(nbitsPerBlock * blocks); + + // i want a mix of empty blocks, randomly filled blocks and full blocks + for (int block = 0; block < blocks * nbitsPerBlock; block += nbitsPerBlock) { + int type = random.nextInt(3); + switch (type) { + case 0: + // a block with random set bits + appendRandomBitset(random, block, bitset, nbitsPerBlock); + break; + case 1: + // a full block + bitset.set(block, block + nbitsPerBlock); + break; + default: + // and an empty block; + break; + } + } + final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testFullByteBuffer() { + final BitSet bitset = new BitSet(); + final int nbits = 1024 * Long.SIZE * 50; + bitset.set(0, nbits); + final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testGapByteBuffer() { + for (int gap = 1; gap <= 4096; gap *= 2) { + for (int offset = 300; offset < 3000; offset += 10) { + BitSet bitset = new BitSet(); + for (int k = 0; k < 100000; k += gap) { + bitset.set(k + offset); + } + final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false); + assertEqualBitsets(bitset, bitmap); + } + } + } + + @Test + public void testRandomByteBuffer() { + final Random random = new Random(8934); + final int runs = 100; + final int maxNbits = 500000; + for (int i = 0;i < runs; ++i) { + final int offset = random.nextInt(maxNbits) & Integer.MAX_VALUE; + final BitSet bitset = randomBitset(random, offset, random.nextInt(maxNbits)); + final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false); + assertEqualBitsets(bitset, bitmap); + } + } + + @Test + public void testByteArrayWithOnly10000000thBitSet() { + final BitSet bitset = new BitSet(); + bitset.set(10000000); + final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testByteArrayWithOnly1And10000000thBitSet() { + final BitSet bitset = new BitSet(); + bitset.set(1); + bitset.set(10000000); + final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), false); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testByteArrayWithFastRank() { + final BitSet bitset = randomBitset(new Random(238), 0, 50); + final RoaringBitmap bitmap = BitSetUtil.bitmapOf(toByteBuffer(bitset), true); + Assertions.assertTrue(bitmap instanceof FastRankRoaringBitmap); + } + + private static ByteBuffer toByteBuffer(BitSet bitset) { + return ByteBuffer.wrap(bitset.toByteArray()); + } } diff --git a/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java b/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java index db7cd0fcd..aa044006f 100644 --- a/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java +++ b/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java @@ -1,18 +1,14 @@ package org.roaringbitmap; +import org.openjdk.jmh.annotations.*; + import java.io.DataInputStream; import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.BitSet; import java.util.concurrent.TimeUnit; import java.util.zip.GZIPInputStream; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; - @BenchmarkMode(Mode.AverageTime) @OutputTimeUnit(TimeUnit.MICROSECONDS) public class BitSetUtilBenchmark { @@ -36,6 +32,33 @@ public long BitSetToRoaringUsingBitSetUtil(Data d) { return bogus; } + /* + Given an uncompressed bitset represented as a byte array (basically, as read on wire) + Below benchmarks the perf difference you will get when: + 1. ByteArrayToRoaring - Directly convert the byte array to a roaring bitmap by wrapping it in a ByteBuffer + 2. ByteArrayToBitsetToRoaring - Convert the byte array to a BitSet and then create the bitmap using it + */ + + @Benchmark + public long ByteArrayToRoaring(Data d) { + long bogus = 0; + for (int i = 0; i < d.bitsetsAsBytes.length; i++) { + ByteBuffer bb = ByteBuffer.wrap(d.bitsetsAsBytes[i]); + bogus += BitSetUtil.bitmapOf(bb, false).getCardinality(); + } + return bogus; + } + + + @Benchmark + public long ByteArrayToBitsetToRoaring(Data d) { + long bogus = 0; + for (int i = 0; i < d.bitsetsAsBytes.length; i++) { + BitSet bitset = BitSet.valueOf(d.bitsetsAsBytes[i]); + bogus += BitSetUtil.bitmapOf(bitset).getCardinality(); + } + return bogus; + } private static RoaringBitmap bitmapTheNaiveWay(final long[] words) { int cardinality = 0; @@ -68,30 +91,54 @@ private static RoaringBitmap bitmapTheNaiveWay(final long[] words) { @State(Scope.Benchmark) public static class Data { long[][] bitsets; + byte[][] bitsetsAsBytes; @Setup public void setup() throws IOException { final String bitset = "/real-roaring-dataset/bitsets_1925630_96.gz"; this.getClass().getResourceAsStream(bitset); this.bitsets = deserialize(bitset); + this.bitsetsAsBytes = bitsetsAsBytes(bitsets); + } + + private byte[][] bitsetsAsBytes(long[][] bitsets) { + byte[][] bitsetsAsBytes = new byte[bitsets.length][]; + for (int i = 0; i < bitsets.length; i++) { + long[] bitset = bitsets[i]; + bitsetsAsBytes[i] = BitSet.valueOf(bitset).toByteArray(); + } + return bitsetsAsBytes; } private long[][] deserialize(final String bitsetResource) throws IOException { final DataInputStream dos = new DataInputStream( new GZIPInputStream(this.getClass().getResourceAsStream(bitsetResource))); try { - final long[][] bitset = new long[dos.readInt()][]; - for (int i = 0; i < bitset.length; i++) { + /* Change this value to see number for small vs large bitsets + wordSize = 64 represents 4096 bits (512 bytes) + wordSize = 512 represents 32768 bits (~4kb) + wordSize = 8192 represents 524288 bits (~64kb) + wordSize = 131072 represents 8388608 bits (~8.3 million, ~1mb) + */ + final int minTotalWordSize = 64; + // Try to keep size of bitsets created below 1 gb + final int bitsetCnt = Math.min((1024 * 1024 * 1024) / (minTotalWordSize * 8), dos.readInt()); + + final long[][] bitset = new long[bitsetCnt][]; + for (int i = 0; i < bitsetCnt; i++) { final int wordSize = dos.readInt(); // for duplication, to make bitsets wider - final int clone = 0; - final long words[] = new long[wordSize * (clone + 1)]; + final int clone = (minTotalWordSize + wordSize) / wordSize; + final long[] words = new long[wordSize * (clone + 1)]; for (int j = 0; j < wordSize; j++) { words[j] = dos.readLong(); } // duplicate long[] n times to the right + for(int j = 0; j < clone; j++) { + System.arraycopy(words, 0, words, (j+1)*wordSize, wordSize); + } bitset[i] = words; } return bitset; @@ -101,5 +148,4 @@ private long[][] deserialize(final String bitsetResource) throws IOException { } } - } From e13ee56298e740587f96ae1b54d51642f22720e0 Mon Sep 17 00:00:00 2001 From: skr Date: Mon, 7 Aug 2023 14:23:11 +0530 Subject: [PATCH 2/7] fix: copy only exact range provided when creating a bitmap container - this removes the need to zero-out the threadlocal buffer everytime --- .../src/main/java/org/roaringbitmap/BitSetUtil.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java index c8df7ddba..9f25a2f35 100644 --- a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java +++ b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java @@ -3,7 +3,6 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.Arrays; import java.util.BitSet; @@ -137,7 +136,6 @@ private static int addBlock(RoaringBitmap ans, long[] words, int containerIndex, if (blockCardinality > 0) { ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(offset), BitSetUtil.containerOf(0, blockLength, blockCardinality, words)); - Arrays.fill(words, 0); // Zero-out thread local buffer after use } return containerIndex; } @@ -160,8 +158,9 @@ private static Container containerOf(final int from, final int to, final int blo return arrayContainerOf(from, to, blockCardinality, words); } else { // otherwise use bitmap container - return new BitmapContainer(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH), - blockCardinality); + long[] container = new long[BLOCK_LENGTH]; + System.arraycopy(words, from, container, 0, to - from); + return new BitmapContainer(container, blockCardinality); } } From 7fd4ced4ad78ad9e15a49786cfd233d0a3cb00b1 Mon Sep 17 00:00:00 2001 From: skr Date: Mon, 7 Aug 2023 20:27:40 +0530 Subject: [PATCH 3/7] * minor refactor --- .../java/org/roaringbitmap/BitSetUtil.java | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java index 9f25a2f35..e87b67146 100644 --- a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java +++ b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java @@ -105,15 +105,18 @@ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { // When block is full, add block to bitmap if (blockLength == BLOCK_LENGTH) { // Each block becomes a single container, if any bit is set - containerIndex = addBlock(ans, words, containerIndex, blockLength, - blockCardinality, offset); - offset += (blockLength * Long.SIZE); + if (blockCardinality > 0) { + ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(offset), + BitSetUtil.containerOf(0, blockLength, blockCardinality, words)); + } + offset += (BLOCK_LENGTH * Long.SIZE); blockLength = blockCardinality = 0; } } if (bb.remaining() > 0) { // Read remaining (less than 8) bytes + // We can do this in while loop also, it will probably slow things down a bit though word = 0; for (int remaining = bb.remaining(), j = 0; j < remaining; j++) { word |= (bb.get() & 0xffL) << (8 * j); @@ -127,17 +130,11 @@ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { } // Add block to map, if any bit is set - addBlock(ans, words, containerIndex, blockLength, blockCardinality, offset); - return ans; - } - - private static int addBlock(RoaringBitmap ans, long[] words, int containerIndex, int blockLength, - int blockCardinality, int offset) { if (blockCardinality > 0) { - ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(offset), + ans.highLowContainer.insertNewKeyValueAt(containerIndex, Util.highbits(offset), BitSetUtil.containerOf(0, blockLength, blockCardinality, words)); } - return containerIndex; + return ans; } private static int cardinality(final int from, final int to, final long[] words) { From 3a54b5a30cb5619ae9654c25153375938b14b20a Mon Sep 17 00:00:00 2001 From: skr Date: Mon, 7 Aug 2023 21:12:51 +0530 Subject: [PATCH 4/7] * refactor: let users provide a cached buffer if they are that needy --- .../java/org/roaringbitmap/BitSetUtil.java | 42 +++++++++++---- .../roaringbitmap/BitSetUtilBenchmark.java | 52 ++++++++++++------- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java index e87b67146..211e8f78b 100644 --- a/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java +++ b/RoaringBitmap/src/main/java/org/roaringbitmap/BitSetUtil.java @@ -16,7 +16,7 @@ public class BitSetUtil { // a block consists has a maximum of 1024 words, each representing 64 bits, // thus representing at maximum 65536 bits - static final private int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; // + public static final int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; // // 64-bit // word @@ -72,10 +72,6 @@ public static RoaringBitmap bitmapOf(final long[] words) { return ans; } - // To avoid memory allocation, reuse ThreadLocal buffers - private static final ThreadLocal WORD_BLOCK = ThreadLocal.withInitial(() -> - new long[BLOCK_LENGTH]); - /** * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer * This method tries to minimise all kinds of memory allocation @@ -86,12 +82,32 @@ public static RoaringBitmap bitmapOf(final long[] words) { * @return roaring bitmap */ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { + return bitmapOf(bb, fastRank, new long[BLOCK_LENGTH]); + } + + /** + * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer + * This method tries to minimise all kinds of memory allocation + *
+ * You can provide a cached wordsBuffer for avoiding 8 KB of extra allocation on every call + * No reference is kept to the wordsBuffer, so it can be cached as a ThreadLocal + * + * @param bb the uncompressed bitmap + * @param fastRank if set, returned bitmap is of type + * {@link org.roaringbitmap.FastRankRoaringBitmap} + * @param wordsBuffer buffer of length {@link BitSetUtil#BLOCK_LENGTH} + * @return roaring bitmap + */ + public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank, long[] wordsBuffer) { + + if (wordsBuffer.length != BLOCK_LENGTH) { + throw new IllegalArgumentException("wordsBuffer length should be " + BLOCK_LENGTH); + } bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN); final RoaringBitmap ans = fastRank ? new FastRankRoaringBitmap() : new RoaringBitmap(); - // split buffer into blocks of long[], reuse a ThreadLocal array for blocks - final long[] words = WORD_BLOCK.get(); + // split buffer into blocks of long[] int containerIndex = 0; int blockLength = 0, blockCardinality = 0, offset = 0; long word; @@ -99,7 +115,7 @@ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { word = bb.getLong(); // Add read long to block - words[blockLength++] = word; + wordsBuffer[blockLength++] = word; blockCardinality += Long.bitCount(word); // When block is full, add block to bitmap @@ -107,8 +123,12 @@ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { // Each block becomes a single container, if any bit is set if (blockCardinality > 0) { ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(offset), - BitSetUtil.containerOf(0, blockLength, blockCardinality, words)); + BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer)); } + /* + Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64 + It's harmless though, as it will happen after the last block is added + */ offset += (BLOCK_LENGTH * Long.SIZE); blockLength = blockCardinality = 0; } @@ -124,7 +144,7 @@ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { // Add last word to block, only if any bit is set if (word != 0) { - words[blockLength++] = word; + wordsBuffer[blockLength++] = word; blockCardinality += Long.bitCount(word); } } @@ -132,7 +152,7 @@ public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { // Add block to map, if any bit is set if (blockCardinality > 0) { ans.highLowContainer.insertNewKeyValueAt(containerIndex, Util.highbits(offset), - BitSetUtil.containerOf(0, blockLength, blockCardinality, words)); + BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer)); } return ans; } diff --git a/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java b/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java index aa044006f..413d40fd9 100644 --- a/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java +++ b/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java @@ -13,30 +13,34 @@ @OutputTimeUnit(TimeUnit.MICROSECONDS) public class BitSetUtilBenchmark { - @Benchmark - public long BitSetToRoaringByAddingBitByBit(Data d) { - long bogus = 0; - for (int i = 0; i < d.bitsets.length; i++) { - bogus += bitmapTheNaiveWay(d.bitsets[i]).getCardinality(); - } - return bogus; - } - - - @Benchmark - public long BitSetToRoaringUsingBitSetUtil(Data d) { - long bogus = 0; - for (int i = 0; i < d.bitsets.length; i++) { - bogus += BitSetUtil.bitmapOf(d.bitsets[i]).getCardinality(); - } - return bogus; - } +// @Benchmark +// public long BitSetToRoaringByAddingBitByBit(Data d) { +// long bogus = 0; +// for (int i = 0; i < d.bitsets.length; i++) { +// bogus += bitmapTheNaiveWay(d.bitsets[i]).getCardinality(); +// } +// return bogus; +// } +// +// +// @Benchmark +// public long BitSetToRoaringUsingBitSetUtil(Data d) { +// long bogus = 0; +// for (int i = 0; i < d.bitsets.length; i++) { +// bogus += BitSetUtil.bitmapOf(d.bitsets[i]).getCardinality(); +// } +// return bogus; +// } + + private static final ThreadLocal WORD_BLOCK = ThreadLocal.withInitial(() -> + new long[BitSetUtil.BLOCK_LENGTH]); /* Given an uncompressed bitset represented as a byte array (basically, as read on wire) Below benchmarks the perf difference you will get when: 1. ByteArrayToRoaring - Directly convert the byte array to a roaring bitmap by wrapping it in a ByteBuffer 2. ByteArrayToBitsetToRoaring - Convert the byte array to a BitSet and then create the bitmap using it + 3. ByteArrayToRoaringWithCachedBuffer - Directly convert and use a cached reused buffer */ @Benchmark @@ -49,6 +53,16 @@ public long ByteArrayToRoaring(Data d) { return bogus; } + @Benchmark + public long ByteArrayToRoaringWithCachedBuffer(Data d) { + long bogus = 0; + for (int i = 0; i < d.bitsetsAsBytes.length; i++) { + ByteBuffer bb = ByteBuffer.wrap(d.bitsetsAsBytes[i]); + bogus += BitSetUtil.bitmapOf(bb, false, WORD_BLOCK.get()).getCardinality(); + } + return bogus; + } + @Benchmark public long ByteArrayToBitsetToRoaring(Data d) { @@ -120,7 +134,7 @@ private long[][] deserialize(final String bitsetResource) throws IOException { wordSize = 8192 represents 524288 bits (~64kb) wordSize = 131072 represents 8388608 bits (~8.3 million, ~1mb) */ - final int minTotalWordSize = 64; + final int minTotalWordSize = 512; // Try to keep size of bitsets created below 1 gb final int bitsetCnt = Math.min((1024 * 1024 * 1024) / (minTotalWordSize * 8), dos.readInt()); From cbf6e7f8eb601565645ae8a9d209a158ae46926b Mon Sep 17 00:00:00 2001 From: skr Date: Mon, 7 Aug 2023 21:13:20 +0530 Subject: [PATCH 5/7] * new: adds similar method in BufferBitSetUtil --- .../buffer/BufferBitSetUtil.java | 90 ++++++++++++++++- .../roaringbitmap/buffer/TestBitSetUtil.java | 97 +++++++++++++++++++ 2 files changed, 184 insertions(+), 3 deletions(-) diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java b/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java index 0f2229104..32ea905e4 100644 --- a/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java +++ b/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java @@ -1,11 +1,13 @@ package org.roaringbitmap.buffer; +import org.roaringbitmap.BitSetUtil; import org.roaringbitmap.IntIterator; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.CharBuffer; import java.nio.LongBuffer; -import java.util.Arrays; import java.util.BitSet; import static java.lang.Long.numberOfTrailingZeros; @@ -77,6 +79,87 @@ public static MutableRoaringBitmap bitmapOf(final long[] words) { return ans; } + /** + * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer + * This method tries to minimise all kinds of memory allocation + * + * @param bb the uncompressed bitmap + * @return roaring bitmap + */ + public static MutableRoaringBitmap bitmapOf(ByteBuffer bb) { + return bitmapOf(bb, new long[BLOCK_LENGTH]); + } + + /** + * Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer + * This method tries to minimise all kinds of memory allocation + *
+ * You can provide a cached wordsBuffer for avoiding 8 KB of extra allocation on every call + * No reference is kept to the wordsBuffer, so it can be cached as a ThreadLocal + * + * @param bb the uncompressed bitmap + * @param wordsBuffer buffer of length {@link BitSetUtil#BLOCK_LENGTH} + * @return roaring bitmap + */ + public static MutableRoaringBitmap bitmapOf(ByteBuffer bb, long[] wordsBuffer) { + + if (wordsBuffer.length != BLOCK_LENGTH) { + throw new IllegalArgumentException("wordsBuffer length should be " + BLOCK_LENGTH); + } + + bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN); + final MutableRoaringBitmap ans = new MutableRoaringBitmap(); + + // split buffer into blocks of long[] + int containerIndex = 0; + int blockLength = 0, blockCardinality = 0, offset = 0; + long word; + while (bb.remaining() >= 8) { + word = bb.getLong(); + + // Add read long to block + wordsBuffer[blockLength++] = word; + blockCardinality += Long.bitCount(word); + + // When block is full, add block to bitmap + if (blockLength == BLOCK_LENGTH) { + // Each block becomes a single container, if any bit is set + if (blockCardinality > 0) { + ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex++, BufferUtil.highbits(offset), + BufferBitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer)); + } + /* + Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64 + It's harmless though, as it will happen after the last block is added + */ + offset += (BLOCK_LENGTH * Long.SIZE); + blockLength = blockCardinality = 0; + } + } + + if (bb.remaining() > 0) { + // Read remaining (less than 8) bytes + // We can do this in while loop also, it will probably slow things down a bit though + word = 0; + for (int remaining = bb.remaining(), j = 0; j < remaining; j++) { + word |= (bb.get() & 0xffL) << (8 * j); + } + + // Add last word to block, only if any bit is set + if (word != 0) { + wordsBuffer[blockLength++] = word; + blockCardinality += Long.bitCount(word); + } + } + + // Add block to map, if any bit is set + if (blockCardinality > 0) { + ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex, BufferUtil.highbits(offset), + BufferBitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer)); + } + return ans; + } + private static int cardinality(final int from, final int to, final long[] words) { int sum = 0; for (int i = from; i < to; i++) { @@ -95,8 +178,9 @@ private static MappeableContainer containerOf(final int from, final int to, return arrayContainerOf(from, to, blockCardinality, words); } else { // otherwise use bitmap container - return new MappeableBitmapContainer( - LongBuffer.wrap(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH)), blockCardinality); + long[] container = new long[BLOCK_LENGTH]; + System.arraycopy(words, from, container, 0, to - from); + return new MappeableBitmapContainer(LongBuffer.wrap(container), blockCardinality); } } diff --git a/RoaringBitmap/src/test/java/org/roaringbitmap/buffer/TestBitSetUtil.java b/RoaringBitmap/src/test/java/org/roaringbitmap/buffer/TestBitSetUtil.java index 9052576de..dda8cce32 100644 --- a/RoaringBitmap/src/test/java/org/roaringbitmap/buffer/TestBitSetUtil.java +++ b/RoaringBitmap/src/test/java/org/roaringbitmap/buffer/TestBitSetUtil.java @@ -4,6 +4,7 @@ import org.junit.jupiter.api.parallel.Execution; import org.junit.jupiter.api.parallel.ExecutionMode; +import java.nio.ByteBuffer; import java.util.BitSet; import java.util.Random; @@ -137,4 +138,100 @@ public void testSmallBitSet10000000() { assertEqualBitsets(bitset, bitmap); } + /* + The ByteBuffer->RoaringBitmap just replicate similar tests written for BitSet/long[]->RoaringBitmap + */ + + @Test + public void testEmptyByteBuffer() { + final BitSet bitset = new BitSet(); + final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset)); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testFlipFlapBetweenRandomFullAndEmptyByteBuffer() { + final Random random = new Random(1234); + final int nbitsPerBlock = 1024 * Long.SIZE; + final int blocks = 50; + final BitSet bitset = new BitSet(nbitsPerBlock * blocks); + + // i want a mix of empty blocks, randomly filled blocks and full blocks + for (int block = 0; block < blocks * nbitsPerBlock; block += nbitsPerBlock) { + int type = random.nextInt(3); + switch (type) { + case 0: + // a block with random set bits + appendRandomBitset(random, block, bitset, nbitsPerBlock); + break; + case 1: + // a full block + bitset.set(block, block + nbitsPerBlock); + break; + default: + // and an empty block; + break; + } + } + final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset)); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testFullByteBuffer() { + final BitSet bitset = new BitSet(); + final int nbits = 1024 * Long.SIZE * 50; + bitset.set(0, nbits); + final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset)); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testGapByteBuffer() { + for (int gap = 1; gap <= 4096; gap *= 2) { + for (int offset = 300; offset < 3000; offset += 10) { + BitSet bitset = new BitSet(); + for (int k = 0; k < 100000; k += gap) { + bitset.set(k + offset); + } + final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset)); + assertEqualBitsets(bitset, bitmap); + } + } + } + + @Test + public void testRandomByteBuffer() { + final Random random = new Random(8934); + final int runs = 100; + final int maxNbits = 500000; + for (int i = 0;i < runs; ++i) { + final int offset = random.nextInt(maxNbits) & Integer.MAX_VALUE; + final BitSet bitset = randomBitset(random, offset, random.nextInt(maxNbits)); + final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset)); + assertEqualBitsets(bitset, bitmap); + } + } + + @Test + public void testByteArrayWithOnly10000000thBitSet() { + final BitSet bitset = new BitSet(); + bitset.set(10000000); + final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset)); + assertEqualBitsets(bitset, bitmap); + } + + @Test + public void testByteArrayWithOnly1And10000000thBitSet() { + final BitSet bitset = new BitSet(); + bitset.set(1); + bitset.set(10000000); + final MutableRoaringBitmap bitmap = BufferBitSetUtil.bitmapOf(toByteBuffer(bitset)); + assertEqualBitsets(bitset, bitmap); + } + + private static ByteBuffer toByteBuffer(BitSet bitset) { + return ByteBuffer.wrap(bitset.toByteArray()); + } + } From 70097aabf1a53d252a43f7e1e31eef323b53d21e Mon Sep 17 00:00:00 2001 From: skr Date: Mon, 7 Aug 2023 21:17:38 +0530 Subject: [PATCH 6/7] * refactor: for style --- .../java/org/roaringbitmap/buffer/BufferBitSetUtil.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java b/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java index 32ea905e4..78192dcc6 100644 --- a/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java +++ b/RoaringBitmap/src/main/java/org/roaringbitmap/buffer/BufferBitSetUtil.java @@ -125,8 +125,9 @@ public static MutableRoaringBitmap bitmapOf(ByteBuffer bb, long[] wordsBuffer) { if (blockLength == BLOCK_LENGTH) { // Each block becomes a single container, if any bit is set if (blockCardinality > 0) { - ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex++, BufferUtil.highbits(offset), - BufferBitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer)); + ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex++, + BufferUtil.highbits(offset), BufferBitSetUtil.containerOf(0, blockLength, + blockCardinality, wordsBuffer)); } /* Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64 @@ -154,7 +155,8 @@ public static MutableRoaringBitmap bitmapOf(ByteBuffer bb, long[] wordsBuffer) { // Add block to map, if any bit is set if (blockCardinality > 0) { - ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex, BufferUtil.highbits(offset), + ((MutableRoaringArray) ans.highLowContainer).insertNewKeyValueAt(containerIndex, + BufferUtil.highbits(offset), BufferBitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer)); } return ans; From 39ce338e9ad22cbd4083e994d07537180c30d615 Mon Sep 17 00:00:00 2001 From: Shikhar Singh <10097451+shikharid@users.noreply.github.com> Date: Tue, 8 Aug 2023 19:51:46 +0530 Subject: [PATCH 7/7] * fix: uncomment commented out benchmarks had hidden locally, forgot to uncomment --- .../roaringbitmap/BitSetUtilBenchmark.java | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java b/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java index 413d40fd9..3c0950eeb 100644 --- a/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java +++ b/jmh/src/jmh/java/org/roaringbitmap/BitSetUtilBenchmark.java @@ -13,24 +13,24 @@ @OutputTimeUnit(TimeUnit.MICROSECONDS) public class BitSetUtilBenchmark { -// @Benchmark -// public long BitSetToRoaringByAddingBitByBit(Data d) { -// long bogus = 0; -// for (int i = 0; i < d.bitsets.length; i++) { -// bogus += bitmapTheNaiveWay(d.bitsets[i]).getCardinality(); -// } -// return bogus; -// } -// -// -// @Benchmark -// public long BitSetToRoaringUsingBitSetUtil(Data d) { -// long bogus = 0; -// for (int i = 0; i < d.bitsets.length; i++) { -// bogus += BitSetUtil.bitmapOf(d.bitsets[i]).getCardinality(); -// } -// return bogus; -// } + @Benchmark + public long BitSetToRoaringByAddingBitByBit(Data d) { + long bogus = 0; + for (int i = 0; i < d.bitsets.length; i++) { + bogus += bitmapTheNaiveWay(d.bitsets[i]).getCardinality(); + } + return bogus; + } + + + @Benchmark + public long BitSetToRoaringUsingBitSetUtil(Data d) { + long bogus = 0; + for (int i = 0; i < d.bitsets.length; i++) { + bogus += BitSetUtil.bitmapOf(d.bitsets[i]).getCardinality(); + } + return bogus; + } private static final ThreadLocal WORD_BLOCK = ThreadLocal.withInitial(() -> new long[BitSetUtil.BLOCK_LENGTH]);