Skip to content

Commit

Permalink
make it possible to disable on the fly run compression
Browse files Browse the repository at this point in the history
  • Loading branch information
richardstartin committed May 30, 2020
1 parent 177ee33 commit 4268dcd
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 59 deletions.
Expand Up @@ -34,7 +34,8 @@
public class ConstantMemoryContainerAppender<T extends BitmapDataProvider
& AppendableStorage<Container>> implements RoaringBitmapWriter<T> {

private boolean doPartialSort;
private final boolean doPartialSort;
private final boolean runCompress;
private static final int WORD_COUNT = 1 << 10;
private final long[] bitmap;
private final Supplier<T> newUnderlying;
Expand All @@ -46,12 +47,16 @@ public class ConstantMemoryContainerAppender<T extends BitmapDataProvider
* Initialize an ConstantMemoryContainerAppender with a receiving bitmap
*
* @param doPartialSort indicates whether to sort the upper 16 bits of input data in addMany
* @param runCompress whether to run compress appended containers
* @param newUnderlying supplier of bitmaps where the data gets written
*/
ConstantMemoryContainerAppender(boolean doPartialSort, Supplier<T> newUnderlying) {
ConstantMemoryContainerAppender(boolean doPartialSort,
boolean runCompress,
Supplier<T> newUnderlying) {
this.newUnderlying = newUnderlying;
this.underlying = newUnderlying.get();
this.doPartialSort = doPartialSort;
this.runCompress = runCompress;
this.bitmap = new long[WORD_COUNT];
}

Expand Down Expand Up @@ -125,8 +130,10 @@ public void reset() {
}

private Container chooseBestContainer() {
Container container = new BitmapContainer(bitmap, -1)
.repairAfterLazy().runOptimize();
Container container = new BitmapContainer(bitmap, -1).repairAfterLazy();
if (runCompress) {
container = container.runOptimize();
}
return container instanceof BitmapContainer ? container.clone() : container;
}

Expand Down
Expand Up @@ -35,6 +35,7 @@ public class ContainerAppender<C extends WordStorage<C>,


private final boolean doPartialSort;
private final boolean runCompress;
private final Supplier<C> newContainer;
private final Supplier<T> newUnderlying;
private C container;
Expand All @@ -45,8 +46,12 @@ public class ContainerAppender<C extends WordStorage<C>,
* Initialize an ContainerAppender with a receiving bitmap
*
*/
ContainerAppender(boolean doPartialSort, Supplier<T> newUnderlying, Supplier<C> newContainer) {
ContainerAppender(boolean doPartialSort,
boolean runCompress,
Supplier<T> newUnderlying,
Supplier<C> newContainer) {
this.doPartialSort = doPartialSort;
this.runCompress = runCompress;
this.newUnderlying = newUnderlying;
this.underlying = newUnderlying.get();
this.newContainer = newContainer;
Expand Down Expand Up @@ -125,7 +130,8 @@ public void reset() {
private int appendToUnderlying() {
if (!container.isEmpty()) {
assert currentKey <= 0xFFFF;
underlying.append((char) currentKey, container.runOptimize());
underlying.append((char) currentKey,
runCompress ? container.runOptimize() : container);
container = newContainer.get();
return 1;
}
Expand Down
Expand Up @@ -21,6 +21,7 @@ abstract class Wizard<C extends WordStorage<C>,
protected int initialCapacity = RoaringArray.INITIAL_CAPACITY;
protected boolean constantMemory;
protected boolean partiallySortValues = false;
protected boolean runCompress = true;
protected Supplier<C> containerSupplier;
protected int expectedContainerSize = 16;

Expand All @@ -47,6 +48,17 @@ public Wizard<C, T> optimiseForRuns() {
return this;
}

/**
* By default the bitmap will be run-compressed on the fly,
* but it can be disabled (and run compressed at the end).
* @param runCompress whether to apply run compression on the fly.
* @return this
*/
public Wizard<C, T> runCompress(boolean runCompress) {
this.runCompress = runCompress;
return this;
}

/**
*
* @param count how many values are expected to fall within any 65536 bit range.
Expand Down Expand Up @@ -140,8 +152,8 @@ public Wizard<C, T> doPartialRadixSort() {
@Override
public RoaringBitmapWriter<T> get() {
int capacity = initialCapacity;
return new ContainerAppender<>(
partiallySortValues, () -> createUnderlying(capacity), containerSupplier);
return new ContainerAppender<>(partiallySortValues, runCompress,
() -> createUnderlying(capacity), containerSupplier);
}

private static void sanityCheck(int count) {
Expand Down Expand Up @@ -196,7 +208,7 @@ public RoaringBitmapWriter<T> get() {
if (constantMemory) {
int capacity = initialCapacity;
return new ConstantMemoryContainerAppender<>(
partiallySortValues, () -> createUnderlying(capacity));
partiallySortValues, runCompress, () -> createUnderlying(capacity));
}
return super.get();
}
Expand Down
Expand Up @@ -21,54 +21,102 @@ public class TestRoaringBitmapWriter {

public static Stream<Arguments> params() {
return Stream.of(
Arguments.of(writer().optimiseForArrays()),
Arguments.of(writer().optimiseForRuns()),
Arguments.of(writer().constantMemory()),
Arguments.of(writer().optimiseForArrays().fastRank()),
Arguments.of(writer().optimiseForRuns().fastRank()),
Arguments.of(writer().constantMemory().fastRank()),
Arguments.of(writer().expectedDensity(0.001)),
Arguments.of(writer().expectedDensity(0.01)),
Arguments.of(writer().expectedDensity(0.1)),
Arguments.of(writer().expectedDensity(0.6)),
Arguments.of(writer().expectedDensity(0.001).fastRank()),
Arguments.of(writer().expectedDensity(0.01).fastRank()),
Arguments.of(writer().expectedDensity(0.1).fastRank()),
Arguments.of(writer().expectedDensity(0.6).fastRank()),
Arguments.of(writer().initialCapacity(1)),
Arguments.of(writer().initialCapacity(8)),
Arguments.of(writer().initialCapacity(8192)),
Arguments.of(writer().initialCapacity(1).fastRank()),
Arguments.of(writer().initialCapacity(8).fastRank()),
Arguments.of(writer().initialCapacity(8192).fastRank()),
Arguments.of(writer().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(bufferWriter().optimiseForArrays()),
Arguments.of(bufferWriter().optimiseForRuns()),
Arguments.of(bufferWriter().constantMemory()),
Arguments.of(bufferWriter().expectedDensity(0.001)),
Arguments.of(bufferWriter().expectedDensity(0.01)),
Arguments.of(bufferWriter().expectedDensity(0.1)),
Arguments.of(bufferWriter().expectedDensity(0.6)),
Arguments.of(bufferWriter().initialCapacity(1)),
Arguments.of(bufferWriter().initialCapacity(8)),
Arguments.of(bufferWriter().initialCapacity(8192)),
Arguments.of(bufferWriter().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)))
Arguments.of(writer().optimiseForArrays()),
Arguments.of(writer().optimiseForRuns()),
Arguments.of(writer().constantMemory()),
Arguments.of(writer().optimiseForArrays().fastRank()),
Arguments.of(writer().optimiseForRuns().fastRank()),
Arguments.of(writer().constantMemory().fastRank()),
Arguments.of(writer().expectedDensity(0.001)),
Arguments.of(writer().expectedDensity(0.01)),
Arguments.of(writer().expectedDensity(0.1)),
Arguments.of(writer().expectedDensity(0.6)),
Arguments.of(writer().expectedDensity(0.001).fastRank()),
Arguments.of(writer().expectedDensity(0.01).fastRank()),
Arguments.of(writer().expectedDensity(0.1).fastRank()),
Arguments.of(writer().expectedDensity(0.6).fastRank()),
Arguments.of(writer().initialCapacity(1)),
Arguments.of(writer().initialCapacity(8)),
Arguments.of(writer().initialCapacity(8192)),
Arguments.of(writer().initialCapacity(1).fastRank()),
Arguments.of(writer().initialCapacity(8).fastRank()),
Arguments.of(writer().initialCapacity(8192).fastRank()),
Arguments.of(writer().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(writer().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank()),
Arguments.of(bufferWriter().optimiseForArrays()),
Arguments.of(bufferWriter().optimiseForRuns()),
Arguments.of(bufferWriter().constantMemory()),
Arguments.of(bufferWriter().expectedDensity(0.001)),
Arguments.of(bufferWriter().expectedDensity(0.01)),
Arguments.of(bufferWriter().expectedDensity(0.1)),
Arguments.of(bufferWriter().expectedDensity(0.6)),
Arguments.of(bufferWriter().initialCapacity(1)),
Arguments.of(bufferWriter().initialCapacity(8)),
Arguments.of(bufferWriter().initialCapacity(8192)),
Arguments.of(bufferWriter().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(bufferWriter().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE))),
Arguments.of(writer().optimiseForArrays().runCompress(false)),
Arguments.of(writer().optimiseForRuns().runCompress(false)),
Arguments.of(writer().constantMemory().runCompress(false)),
Arguments.of(writer().optimiseForArrays().fastRank().runCompress(false)),
Arguments.of(writer().optimiseForRuns().fastRank().runCompress(false)),
Arguments.of(writer().constantMemory().fastRank().runCompress(false)),
Arguments.of(writer().expectedDensity(0.001).runCompress(false)),
Arguments.of(writer().expectedDensity(0.01).runCompress(false)),
Arguments.of(writer().expectedDensity(0.1).runCompress(false)),
Arguments.of(writer().expectedDensity(0.6).runCompress(false)),
Arguments.of(writer().expectedDensity(0.001).fastRank().runCompress(false)),
Arguments.of(writer().expectedDensity(0.01).fastRank().runCompress(false)),
Arguments.of(writer().expectedDensity(0.1).fastRank().runCompress(false)),
Arguments.of(writer().expectedDensity(0.6).fastRank().runCompress(false)),
Arguments.of(writer().initialCapacity(1).runCompress(false)),
Arguments.of(writer().initialCapacity(8).runCompress(false)),
Arguments.of(writer().initialCapacity(8192).runCompress(false)),
Arguments.of(writer().initialCapacity(1).fastRank().runCompress(false)),
Arguments.of(writer().initialCapacity(8).fastRank().runCompress(false)),
Arguments.of(writer().initialCapacity(8192).fastRank().runCompress(false)),
Arguments.of(writer().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(writer().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(writer().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(writer().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank().runCompress(false)),
Arguments.of(writer().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank().runCompress(false)),
Arguments.of(writer().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE)).fastRank().runCompress(false)),
Arguments.of(writer().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(writer().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(writer().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(writer().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank().runCompress(false)),
Arguments.of(writer().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank().runCompress(false)),
Arguments.of(writer().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).fastRank().runCompress(false)),
Arguments.of(bufferWriter().optimiseForArrays().runCompress(false)),
Arguments.of(bufferWriter().optimiseForRuns().runCompress(false)),
Arguments.of(bufferWriter().constantMemory().runCompress(false)),
Arguments.of(bufferWriter().expectedDensity(0.001).runCompress(false)),
Arguments.of(bufferWriter().expectedDensity(0.01).runCompress(false)),
Arguments.of(bufferWriter().expectedDensity(0.1).runCompress(false)),
Arguments.of(bufferWriter().expectedDensity(0.6).runCompress(false)),
Arguments.of(bufferWriter().initialCapacity(1).runCompress(false)),
Arguments.of(bufferWriter().initialCapacity(8).runCompress(false)),
Arguments.of(bufferWriter().initialCapacity(8192).runCompress(false)),
Arguments.of(bufferWriter().optimiseForArrays().expectedRange(0, toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(bufferWriter().optimiseForRuns().expectedRange(0, toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(bufferWriter().constantMemory().expectedRange(0, toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(bufferWriter().optimiseForArrays().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(bufferWriter().optimiseForRuns().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).runCompress(false)),
Arguments.of(bufferWriter().constantMemory().expectedRange(toUnsignedLong(MAX_VALUE), toUnsignedLong(MIN_VALUE)).runCompress(false))
);
}

Expand Down Expand Up @@ -150,10 +198,10 @@ public void testWriteBitmapAfterReset(Supplier<RoaringBitmapWriter<? extends Bit
RoaringBitmapWriter writer = supplier.get();
writer.add(0);
writer.add(-2);
assertArrayEquals(new int[] {0, -2}, writer.get().toArray());
assertArrayEquals(new int[]{0, -2}, writer.get().toArray());
writer.reset();
writer.add(100);
writer.addMany(4, 5, 6);
assertArrayEquals(new int[] {4, 5, 6, 100}, writer.get().toArray());
assertArrayEquals(new int[]{4, 5, 6, 100}, writer.get().toArray());
}
}

0 comments on commit 4268dcd

Please sign in to comment.