-
Notifications
You must be signed in to change notification settings - Fork 539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimisation: Add zero-garbage deserialiser for ByteBuffer to RoaringBitmap #650
Changes from all commits
df4f7bb
e13ee56
7fd4ced
3a54b5a
cbf6e7f
70097aa
39ce338
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
package org.roaringbitmap; | ||
|
||
|
||
import java.util.Arrays; | ||
import java.nio.ByteBuffer; | ||
import java.nio.ByteOrder; | ||
import java.util.BitSet; | ||
|
||
|
||
|
@@ -15,7 +16,7 @@ public class BitSetUtil { | |
|
||
// a block consists has a maximum of 1024 words, each representing 64 bits, | ||
// thus representing at maximum 65536 bits | ||
static final private int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; // | ||
public static final int BLOCK_LENGTH = BitmapContainer.MAX_CAPACITY / Long.SIZE; // | ||
// 64-bit | ||
// word | ||
|
||
|
@@ -71,6 +72,91 @@ public static RoaringBitmap bitmapOf(final long[] words) { | |
return ans; | ||
} | ||
|
||
/** | ||
* Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer | ||
* This method tries to minimise all kinds of memory allocation | ||
* | ||
* @param bb the uncompressed bitmap | ||
* @param fastRank if set, returned bitmap is of type | ||
* {@link org.roaringbitmap.FastRankRoaringBitmap} | ||
* @return roaring bitmap | ||
*/ | ||
public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank) { | ||
return bitmapOf(bb, fastRank, new long[BLOCK_LENGTH]); | ||
} | ||
|
||
/** | ||
* Efficiently generate a RoaringBitmap from an uncompressed byte array or ByteBuffer | ||
* This method tries to minimise all kinds of memory allocation | ||
* <br> | ||
* You can provide a cached wordsBuffer for avoiding 8 KB of extra allocation on every call | ||
* No reference is kept to the wordsBuffer, so it can be cached as a ThreadLocal | ||
* | ||
* @param bb the uncompressed bitmap | ||
* @param fastRank if set, returned bitmap is of type | ||
* {@link org.roaringbitmap.FastRankRoaringBitmap} | ||
* @param wordsBuffer buffer of length {@link BitSetUtil#BLOCK_LENGTH} | ||
* @return roaring bitmap | ||
*/ | ||
public static RoaringBitmap bitmapOf(ByteBuffer bb, boolean fastRank, long[] wordsBuffer) { | ||
|
||
if (wordsBuffer.length != BLOCK_LENGTH) { | ||
throw new IllegalArgumentException("wordsBuffer length should be " + BLOCK_LENGTH); | ||
} | ||
|
||
bb = bb.slice().order(ByteOrder.LITTLE_ENDIAN); | ||
final RoaringBitmap ans = fastRank ? new FastRankRoaringBitmap() : new RoaringBitmap(); | ||
|
||
// split buffer into blocks of long[] | ||
int containerIndex = 0; | ||
int blockLength = 0, blockCardinality = 0, offset = 0; | ||
long word; | ||
while (bb.remaining() >= 8) { | ||
word = bb.getLong(); | ||
|
||
// Add read long to block | ||
wordsBuffer[blockLength++] = word; | ||
blockCardinality += Long.bitCount(word); | ||
|
||
// When block is full, add block to bitmap | ||
if (blockLength == BLOCK_LENGTH) { | ||
// Each block becomes a single container, if any bit is set | ||
if (blockCardinality > 0) { | ||
ans.highLowContainer.insertNewKeyValueAt(containerIndex++, Util.highbits(offset), | ||
BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer)); | ||
} | ||
/* | ||
Offset can overflow when bitsets size is more than Integer.MAX_VALUE - 64 | ||
It's harmless though, as it will happen after the last block is added | ||
*/ | ||
offset += (BLOCK_LENGTH * Long.SIZE); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Though this can be reasonably dismissed, there is the possibility that offset overflows. Make sure that the offset variable cannot overflow (hopefully it cannot due to the the max size of a Java Bitset, but please be specific, maybe with a comment). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't do much here, will add a comment. |
||
blockLength = blockCardinality = 0; | ||
} | ||
} | ||
|
||
if (bb.remaining() > 0) { | ||
// Read remaining (less than 8) bytes | ||
// We can do this in while loop also, it will probably slow things down a bit though | ||
word = 0; | ||
for (int remaining = bb.remaining(), j = 0; j < remaining; j++) { | ||
word |= (bb.get() & 0xffL) << (8 * j); | ||
} | ||
|
||
// Add last word to block, only if any bit is set | ||
if (word != 0) { | ||
wordsBuffer[blockLength++] = word; | ||
blockCardinality += Long.bitCount(word); | ||
} | ||
} | ||
|
||
// Add block to map, if any bit is set | ||
if (blockCardinality > 0) { | ||
ans.highLowContainer.insertNewKeyValueAt(containerIndex, Util.highbits(offset), | ||
BitSetUtil.containerOf(0, blockLength, blockCardinality, wordsBuffer)); | ||
} | ||
return ans; | ||
} | ||
|
||
private static int cardinality(final int from, final int to, final long[] words) { | ||
int sum = 0; | ||
for (int i = from; i < to; i++) { | ||
|
@@ -89,8 +175,9 @@ private static Container containerOf(final int from, final int to, final int blo | |
return arrayContainerOf(from, to, blockCardinality, words); | ||
} else { | ||
// otherwise use bitmap container | ||
return new BitmapContainer(Arrays.copyOfRange(words, from, from + BLOCK_LENGTH), | ||
blockCardinality); | ||
long[] container = new long[BLOCK_LENGTH]; | ||
System.arraycopy(words, from, container, 0, to - from); | ||
return new BitmapContainer(container, blockCardinality); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it necessary to make BLOCK_LENGTH public?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't see any other neat way to expose the information.
Since the user can provide the buffer, they need to know atleast what size it needs to be.
Can just mention it in Javadoc, the bounds check will anyways raise error if bad sized buffer is provided.