From 73a09574cddf37a918933859e7a2b2b5e67a3590 Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Tue, 7 Apr 2020 14:45:46 +0100 Subject: [PATCH] add specialised andNotCardinality for improved performance (#378) * add specialised andNotCardinality for improved performance * Remove unused import. --- .../java/org/roaringbitmap/RoaringBitmap.java | 37 ++++++- .../org/roaringbitmap/TestRoaringBitmap.java | 23 ++++ .../java/org/roaringbitmap/RandomData.java | 20 ++-- .../CombinedCardinalityBenchmark.java | 102 ++++++++++++++++++ 4 files changed, 172 insertions(+), 10 deletions(-) create mode 100644 jmh/src/jmh/java/org/roaringbitmap/combinedcardinality/CombinedCardinalityBenchmark.java diff --git a/RoaringBitmap/src/main/java/org/roaringbitmap/RoaringBitmap.java b/RoaringBitmap/src/main/java/org/roaringbitmap/RoaringBitmap.java index 35c299e43..b4701d37c 100644 --- a/RoaringBitmap/src/main/java/org/roaringbitmap/RoaringBitmap.java +++ b/RoaringBitmap/src/main/java/org/roaringbitmap/RoaringBitmap.java @@ -870,7 +870,42 @@ public static int xorCardinality(final RoaringBitmap x1, final RoaringBitmap x2) * @return cardinality of the left difference */ public static int andNotCardinality(final RoaringBitmap x1, final RoaringBitmap x2) { - return x1.getCardinality() - andCardinality(x1, x2); + final int length1 = x1.highLowContainer.size(), length2 = x2.highLowContainer.size(); + + if (length2 > 4 * length1) { + // if x1 is much smaller than x2, this can be much faster + return x1.getCardinality() - andCardinality(x1, x2); + } + + long cardinality = 0L; + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + char s1 = x1.highLowContainer.getKeyAtIndex(pos1); + char s2 = x2.highLowContainer.getKeyAtIndex(pos2); + if (s1 == s2) { + final Container c1 = x1.highLowContainer.getContainerAtIndex(pos1); + final Container c2 = x2.highLowContainer.getContainerAtIndex(pos2); + cardinality += c1.getCardinality() - c1.andCardinality(c2); + ++pos1; + ++pos2; + } else if (s1 < s2) { + while (s1 < s2 && pos1 < length1) { + cardinality += x1.highLowContainer.getContainerAtIndex(pos1).getCardinality(); + s1 = x1.highLowContainer.getKeyAtIndex(pos1); + ++pos1; + } + } else { + pos2 = x2.highLowContainer.advanceUntil(s1, pos2); + } + } + if (pos2 == length2) { + while (pos1 < length1) { + cardinality += x1.highLowContainer.getContainerAtIndex(pos1).getCardinality(); + ++pos1; + } + } + return (int)cardinality; } /** diff --git a/RoaringBitmap/src/test/java/org/roaringbitmap/TestRoaringBitmap.java b/RoaringBitmap/src/test/java/org/roaringbitmap/TestRoaringBitmap.java index 09eed5701..3692c7f09 100644 --- a/RoaringBitmap/src/test/java/org/roaringbitmap/TestRoaringBitmap.java +++ b/RoaringBitmap/src/test/java/org/roaringbitmap/TestRoaringBitmap.java @@ -2066,6 +2066,29 @@ public void testAndNotCardinality() { assertEquals(andNot.getCardinality(), RoaringBitmap.andNotCardinality(rb, rb2)); } + + @Test + public void testAndNotCardinalityBigVsSmall() { + RoaringBitmap small = RoaringBitmap.bitmapOf(1, 2, 3); + RoaringBitmap big = new RoaringBitmap(); + for (int i = 0; i < 4000; ++i) { + big.add(1 + i * 0x1000); + } + RoaringBitmap andNot = RoaringBitmap.andNot(big, small); + assertEquals(andNot.getCardinality(), RoaringBitmap.andNotCardinality(big, small)); + } + + @Test + public void testAndNotCardinalitySmallVsBig() { + RoaringBitmap small = RoaringBitmap.bitmapOf(1, 2, 3); + RoaringBitmap big = new RoaringBitmap(); + for (int i = 0; i < 4000; ++i) { + big.add(1 + i * 0x1000); + } + RoaringBitmap andNot = RoaringBitmap.andNot(small, big); + assertEquals(andNot.getCardinality(), RoaringBitmap.andNotCardinality(small, big)); + } + @Test public void ortest() { final RoaringBitmap rr = new RoaringBitmap(); diff --git a/jmh/src/jmh/java/org/roaringbitmap/RandomData.java b/jmh/src/jmh/java/org/roaringbitmap/RandomData.java index f22016d37..779f416c4 100644 --- a/jmh/src/jmh/java/org/roaringbitmap/RandomData.java +++ b/jmh/src/jmh/java/org/roaringbitmap/RandomData.java @@ -1,10 +1,12 @@ package org.roaringbitmap; import java.util.Arrays; -import java.util.concurrent.ThreadLocalRandom; +import java.util.SplittableRandom; import java.util.stream.IntStream; public class RandomData { + + private static final SplittableRandom RANDOM = new SplittableRandom(0); private static final ThreadLocal bits = ThreadLocal.withInitial(() -> new long[1 << 10]); private static final ThreadLocal runs = ThreadLocal.withInitial(() -> new int[4096]); @@ -22,17 +24,17 @@ public static RoaringBitmap randomBitmap(int maxKeys, double rleLimit, double de } public static IntStream rleRegion() { - int maxNumRuns = ThreadLocalRandom.current().nextInt(1, 2048); + int maxNumRuns = RANDOM.nextInt(1, 2048); int minRequiredCardinality = maxNumRuns * 2 + 1; int[] values = runs.get(); int totalRuns = 0; - int start = ThreadLocalRandom.current().nextInt(64); + int start = RANDOM.nextInt(64); int run = 0; while (minRequiredCardinality > 0 && start < 0xFFFF && run < 2 * maxNumRuns) { - int runLength = ThreadLocalRandom.current().nextInt(1, minRequiredCardinality + 1); + int runLength = RANDOM.nextInt(1, minRequiredCardinality + 1); values[run++] = start; values[run++] = Math.min(start + runLength, 0x10000 - start); - start += runLength + ThreadLocalRandom.current().nextInt(64); + start += runLength + RANDOM.nextInt(64); minRequiredCardinality -= runLength; ++totalRuns; } @@ -43,11 +45,11 @@ public static IntStream rleRegion() { } public static IntStream sparseRegion() { - return IntStream.of(createSorted16BitInts(ThreadLocalRandom.current().nextInt(1, 4096))); + return IntStream.of(createSorted16BitInts(RANDOM.nextInt(1, 4096))); } public static IntStream denseRegion() { - return IntStream.of(createSorted16BitInts(ThreadLocalRandom.current().nextInt(4096, 1 << 16))); + return IntStream.of(createSorted16BitInts(RANDOM.nextInt(4096, 1 << 16))); } private static int[] createSorted16BitInts(int howMany) { @@ -55,7 +57,7 @@ private static int[] createSorted16BitInts(int howMany) { Arrays.fill(bitset, 0L); int consumed = 0; while (consumed < howMany) { - int value = ThreadLocalRandom.current().nextInt(1 << 16); + int value = RANDOM.nextInt(1 << 16); long bit = (1L << value); consumed += 1 - Long.bitCount(bitset[value >>> 6] & bit); bitset[value >>> 6] |= bit; @@ -78,7 +80,7 @@ private static RoaringBitmap forKeys(int[] keys, double rleLimit, double denseLi RoaringBitmapWriter writer = RoaringBitmapWriter.writer().optimiseForArrays().get(); IntStream.of(keys) .forEach(key -> { - double choice = ThreadLocalRandom.current().nextDouble(); + double choice = RANDOM.nextDouble(); final IntStream stream; if (choice < rleLimit) { stream = rleRegion(); diff --git a/jmh/src/jmh/java/org/roaringbitmap/combinedcardinality/CombinedCardinalityBenchmark.java b/jmh/src/jmh/java/org/roaringbitmap/combinedcardinality/CombinedCardinalityBenchmark.java new file mode 100644 index 000000000..51fce4e22 --- /dev/null +++ b/jmh/src/jmh/java/org/roaringbitmap/combinedcardinality/CombinedCardinalityBenchmark.java @@ -0,0 +1,102 @@ +package org.roaringbitmap.combinedcardinality; + +import org.openjdk.jmh.annotations.*; +import org.roaringbitmap.RandomData; +import org.roaringbitmap.RoaringBitmap; + +import java.util.concurrent.TimeUnit; + +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@BenchmarkMode(Mode.AverageTime) +@Fork(value = 1, jvmArgsPrepend = + { + "-XX:-TieredCompilation", + "-XX:+UseParallelGC", + "-mx2G", + "-ms2G", + "-XX:+AlwaysPreTouch" + }) +@State(Scope.Benchmark) +public class CombinedCardinalityBenchmark { + + public enum Scenario { + EQUAL { + @Override + RoaringBitmap[] bitmaps() { + RoaringBitmap bitmap = RandomData.randomBitmap(1 << 12, 0.2, 0.3); + return new RoaringBitmap[] {bitmap, bitmap.clone()}; + } + }, + SHIFTED { + @Override + RoaringBitmap[] bitmaps() { + RoaringBitmap bitmap = RandomData.randomBitmap(1 << 12, 0.2, 0.3); + return new RoaringBitmap[] {bitmap, RoaringBitmap.addOffset(bitmap, 1 << 16)}; + } + }, + SMALL_LARGE { + @Override + RoaringBitmap[] bitmaps() { + return new RoaringBitmap[] { + RandomData.randomBitmap(1 << 4, 0.2, 0.3), + RandomData.randomBitmap(1 << 12, 0.2, 0.3) + }; + } + }, + LARGE_SMALL { + @Override + RoaringBitmap[] bitmaps() { + return new RoaringBitmap[] { + RandomData.randomBitmap(1 << 12, 0.2, 0.3), + RandomData.randomBitmap(1 << 4, 0.2, 0.3) + }; + } + } + ; + abstract RoaringBitmap[] bitmaps(); + } + + @Param + Scenario scenario; + + RoaringBitmap left; + RoaringBitmap right; + + @Setup(Level.Trial) + public void init() { + RoaringBitmap[] bitmaps = scenario.bitmaps(); + left = bitmaps[0]; + right = bitmaps[1]; + } + + + @Benchmark + public int xorCardinality() { + return RoaringBitmap.xorCardinality(left, right); + } + + @Benchmark + public int xorCardinalityBaseline() { + return left.getCardinality() + right.getCardinality() - 2 * RoaringBitmap.andCardinality(left, right); + } + + @Benchmark + public int andNotCardinality() { + return RoaringBitmap.andNotCardinality(left, right); + } + + @Benchmark + public int andNotCardinalityBaseline() { + return left.getCardinality() - RoaringBitmap.andCardinality(left, right); + } + + @Benchmark + public int orCardinality() { + return RoaringBitmap.orCardinality(left, right); + } + + @Benchmark + public int orCardinalityBaseline() { + return left.getCardinality() + right.getCardinality() - RoaringBitmap.andCardinality(left, right); + } +}