From fd7c4052b4008c8ddd53e32e16bd012b907644d5 Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Mon, 23 Dec 2019 21:48:52 +0000 Subject: [PATCH 1/7] fix array OOBE in blocked bloom filter when top 4 bits of hash are set (seed dependent behaviour) --- .../org/fastfilter/bloom/BlockedBloom.java | 10 +++++----- .../fastfilter/bloom/BlockedBloomTest.java | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 fastfilter/src/test/java/org/fastfilter/bloom/BlockedBloomTest.java diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java b/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java index b8551b5..d862c7d 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java @@ -12,8 +12,8 @@ public class BlockedBloom implements Filter { public static BlockedBloom construct(long[] keys, int bitsPerKey) { - long n = keys.length; - BlockedBloom f = new BlockedBloom((int) n, bitsPerKey); + int n = keys.length; + BlockedBloom f = new BlockedBloom(n, bitsPerKey); for(long x : keys) { f.add(x); } @@ -34,7 +34,7 @@ public long getBitCount() { this.seed = Hash.randomSeed(); long bits = (long) entryCount * bitsPerKey; this.buckets = (int) bits / 64; - data = new long[(int) (buckets + 16)]; + data = new long[(buckets + 16)]; } @Override @@ -50,7 +50,7 @@ public void add(long key) { long m1 = (1L << hash) | (1L << (hash >> 6)); long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18)); data[start] |= m1; - data[start + 1 + (int) (hash >>> 60)] |= m2; + data[start + (int) (hash >>> 60)] |= m2; } @Override @@ -59,7 +59,7 @@ public boolean mayContain(long key) { int start = Hash.reduce((int) hash, buckets); hash = hash ^ Long.rotateLeft(hash, 32); long a = data[start]; - long b = data[start + 1 + (int) (hash >>> 60)]; + long b = data[start + (int) (hash >>> 60)]; long m1 = (1L << hash) | (1L << (hash >> 6)); long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18)); return ((m1 & a) == m1) && ((m2 & b) == m2); diff --git a/fastfilter/src/test/java/org/fastfilter/bloom/BlockedBloomTest.java b/fastfilter/src/test/java/org/fastfilter/bloom/BlockedBloomTest.java new file mode 100644 index 0000000..c8ede79 --- /dev/null +++ b/fastfilter/src/test/java/org/fastfilter/bloom/BlockedBloomTest.java @@ -0,0 +1,19 @@ +package org.fastfilter.bloom; + +import org.fastfilter.utils.Hash; +import org.junit.Test; + +import static org.junit.Assert.assertTrue; + +public class BlockedBloomTest { + + @Test + public void testCreateSmallBlockedBloomFilter() { + Hash.setSeed(872153271794238865L); + BlockedBloom filter = BlockedBloom.construct(new long[]{1, 2, 3}, 8); + assertTrue(filter.mayContain(1)); + assertTrue(filter.mayContain(2)); + assertTrue(filter.mayContain(3)); + } + +} \ No newline at end of file From 011e9f3c546de2974fbcb8daecd6e34f76179eeb Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Mon, 23 Dec 2019 22:05:15 +0000 Subject: [PATCH 2/7] record failing commit for SuccinctCountingBlockedBloom --- .../java/org/fastfilter/SimpleFuzzer.java | 34 +++++++++++++++++++ .../SuccinctCountingBlockedBloomTest.java | 21 ++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java create mode 100644 fastfilter/src/test/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomTest.java diff --git a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java new file mode 100644 index 0000000..6188352 --- /dev/null +++ b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java @@ -0,0 +1,34 @@ +package org.fastfilter; + +import org.fastfilter.utils.Hash; +import org.junit.Test; + +import java.util.concurrent.ThreadLocalRandom; + +import static junit.framework.TestCase.assertTrue; + +public class SimpleFuzzer { + + + @Test + public void fuzzTest() { + long[] keys = new long[]{ 1, 2, 3}; + long seed = 0; + + for (FilterType type : FilterType.values()) { + try { + for (int i = 0; i < 1000_000; ++i) { + seed = ThreadLocalRandom.current().nextLong(); + Hash.setSeed(seed); + Filter filter = type.construct(keys, 8); + assertTrue(filter.mayContain(1)); + assertTrue(filter.mayContain(2)); + assertTrue(filter.mayContain(3)); + } + } catch (Exception e) { + System.out.println(seed + "/" + type); + throw e; + } + } + } +} diff --git a/fastfilter/src/test/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomTest.java b/fastfilter/src/test/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomTest.java new file mode 100644 index 0000000..84a05bb --- /dev/null +++ b/fastfilter/src/test/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomTest.java @@ -0,0 +1,21 @@ +package org.fastfilter.bloom.count; + +import org.fastfilter.utils.Hash; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class SuccinctCountingBlockedBloomTest { + + @Test + public void indexOutOfBoundsRegression() { + long seed = 6049486880293779298L; + long[] keys = new long[]{1, 2, 3}; + Hash.setSeed(seed); + SuccinctCountingBlockedBloom filter = SuccinctCountingBlockedBloom.construct(keys, 8); + assertTrue(filter.mayContain(1)); + assertTrue(filter.mayContain(2)); + assertTrue(filter.mayContain(3)); + } + +} \ No newline at end of file From 9cac300175a2ad12f888c2b3204fb2451c16748b Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Mon, 23 Dec 2019 22:08:25 +0000 Subject: [PATCH 3/7] fix OOBE in SuccinctCountingBlockedBloom --- .../bloom/count/SuccinctCountingBlockedBloom.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java index 857636b..3b5b93f 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java @@ -61,7 +61,7 @@ public long getBitCount() { this.seed = Hash.randomSeed(); long bits = (long) entryCount * bitsPerKey; this.buckets = (int) bits / 64; - int arrayLength = (int) (buckets + 16); + int arrayLength = (buckets + 16); data = new long[arrayLength]; counts = new long[arrayLength]; overflow = new long[100 + arrayLength * 10 / 100]; @@ -87,7 +87,7 @@ public void add(long key) { if (a2 != a1) { increment(start, a2); } - int second = start + 1 + (int) (hash >>> 60); + int second = start + (int) (hash >>> 60); int a3 = (int) ((hash >> 12) & 63); int a4 = (int) ((hash >> 18) & 63); increment(second, a3); @@ -112,7 +112,7 @@ public void remove(long key) { if (a2 != a1) { decrement(start, a2); } - int second = start + 1 + (int) (hash >>> 60); + int second = start + (int) (hash >>> 60); int a3 = (int) ((hash >> 12) & 63); int a4 = (int) ((hash >> 18) & 63); decrement(second, a3); @@ -142,7 +142,7 @@ public boolean mayContain(long key) { int start = Hash.reduce((int) hash, buckets); hash = hash ^ Long.rotateLeft(hash, 32); long a = data[start]; - long b = data[start + 1 + (int) (hash >>> 60)]; + long b = data[start + (int) (hash >>> 60)]; long m1 = (1L << hash) | (1L << (hash >> 6)); long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18)); return ((m1 & a) == m1) && ((m2 & b) == m2); From 9a045adc400a32a44784efe8a2142f02fcaf40f1 Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Mon, 23 Dec 2019 22:34:04 +0000 Subject: [PATCH 4/7] capture more failures, leave GCS2 broken --- .../SuccinctCountingBlockedBloomRanked.java | 6 +-- .../java/org/fastfilter/cuckoo/Cuckoo16.java | 2 +- .../java/org/fastfilter/cuckoo/Cuckoo8.java | 2 +- .../java/org/fastfilter/xor/XorSimple.java | 2 +- .../java/org/fastfilter/RegressionTests.java | 47 +++++++++++++++++++ .../java/org/fastfilter/SimpleFuzzer.java | 6 +-- .../fastfilter/bloom/BlockedBloomTest.java | 19 -------- .../SuccinctCountingBlockedBloomTest.java | 21 --------- 8 files changed, 56 insertions(+), 49 deletions(-) create mode 100644 fastfilter/src/test/java/org/fastfilter/RegressionTests.java delete mode 100644 fastfilter/src/test/java/org/fastfilter/bloom/BlockedBloomTest.java delete mode 100644 fastfilter/src/test/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomTest.java diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java index 3f33ab7..efb5d61 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java @@ -89,7 +89,7 @@ public void add(long key) { if (a2 != a1) { increment(start, a2); } - int second = start + 1 + (int) (hash >>> 60); + int second = start + (int) (hash >>> 60); int a3 = (int) ((hash >> 12) & 63); int a4 = (int) ((hash >> 18) & 63); increment(second, a3); @@ -114,7 +114,7 @@ public void remove(long key) { if (a2 != a1) { decrement(start, a2); } - int second = start + 1 + (int) (hash >>> 60); + int second = start + (int) (hash >>> 60); int a3 = (int) ((hash >> 12) & 63); int a4 = (int) ((hash >> 18) & 63); decrement(second, a3); @@ -144,7 +144,7 @@ public boolean mayContain(long key) { int start = Hash.reduce((int) hash, buckets); hash = hash ^ Long.rotateLeft(hash, 32); long a = data[start]; - long b = data[start + 1 + (int) (hash >>> 60)]; + long b = data[start + (int) (hash >>> 60)]; long m1 = (1L << hash) | (1L << (hash >> 6)); long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18)); return ((m1 & a) == m1) && ((m2 & b) == m2); diff --git a/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java b/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java index 7084a32..5cf7017 100644 --- a/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java +++ b/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java @@ -39,7 +39,7 @@ public static Cuckoo16 construct(long[] keys) { public Cuckoo16(int capacity) { // bucketCount needs to be even for bucket2 to work - bucketCount = (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2; + bucketCount = Math.max(1, (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2); this.data = new long[bucketCount]; this.seed = Hash.randomSeed(); } diff --git a/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java b/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java index 7bf6f10..c9d7b8a 100644 --- a/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java +++ b/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java @@ -39,7 +39,7 @@ public static Cuckoo8 construct(long[] keys) { public Cuckoo8(int capacity) { // bucketCount needs to be even for bucket2 to work - bucketCount = (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2; + bucketCount = Math.max(1, (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2); this.data = new int[bucketCount]; this.seed = Hash.randomSeed(); } diff --git a/fastfilter/src/main/java/org/fastfilter/xor/XorSimple.java b/fastfilter/src/main/java/org/fastfilter/xor/XorSimple.java index bb2287e..88ba12d 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/XorSimple.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/XorSimple.java @@ -51,7 +51,7 @@ boolean map(long[] keys, long seed, long[] stack) { } } int si = 0; - while (si < 2 * keys.length) { + while (si < 2 * keys.length && qi > 0) { int i = Q[--qi]; if (C[i] == 1) { long x = H[i]; diff --git a/fastfilter/src/test/java/org/fastfilter/RegressionTests.java b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java new file mode 100644 index 0000000..bfd59a9 --- /dev/null +++ b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java @@ -0,0 +1,47 @@ +package org.fastfilter; + +import org.fastfilter.utils.Hash; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.fastfilter.FilterType.*; +import static org.junit.Assert.assertTrue; + +@RunWith(Parameterized.class) +public class RegressionTests { + + + @Parameterized.Parameters(name = "{0}/{1}/*") + public static Object[][] regressionCases() { + return new Object[][] { + {BLOCKED_BLOOM, 872153271794238865L, new long[]{1, 2, 3}}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED, -401700599714690558L, new long[]{1, 2, 3}}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM, 6049486880293779298L, new long[]{1, 2, 3}}, + // actual this one is impossible to reproduce because of the volatile seed + {XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}}, + {CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}}, + {CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}}, + {GCS2, -2130647756636796307L, new long[]{1, 2, 3}} + }; + } + + private final FilterType type; + private final long seed; + private final long[] keys; + + public RegressionTests(FilterType type, long seed, long[] keys) { + this.type = type; + this.seed = seed; + this.keys = keys; + } + + @Test + public void regressionTest() { + Hash.setSeed(seed); + Filter filter = type.construct(keys, 8); + for (long key : keys) { + assertTrue(filter.mayContain(key)); + } + } +} diff --git a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java index 6188352..fb59b89 100644 --- a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java +++ b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java @@ -21,9 +21,9 @@ public void fuzzTest() { seed = ThreadLocalRandom.current().nextLong(); Hash.setSeed(seed); Filter filter = type.construct(keys, 8); - assertTrue(filter.mayContain(1)); - assertTrue(filter.mayContain(2)); - assertTrue(filter.mayContain(3)); + assertTrue(seed + "/" + type, filter.mayContain(1)); + assertTrue(seed + "/" + type, filter.mayContain(2)); + assertTrue(seed + "/" + type, filter.mayContain(3)); } } catch (Exception e) { System.out.println(seed + "/" + type); diff --git a/fastfilter/src/test/java/org/fastfilter/bloom/BlockedBloomTest.java b/fastfilter/src/test/java/org/fastfilter/bloom/BlockedBloomTest.java deleted file mode 100644 index c8ede79..0000000 --- a/fastfilter/src/test/java/org/fastfilter/bloom/BlockedBloomTest.java +++ /dev/null @@ -1,19 +0,0 @@ -package org.fastfilter.bloom; - -import org.fastfilter.utils.Hash; -import org.junit.Test; - -import static org.junit.Assert.assertTrue; - -public class BlockedBloomTest { - - @Test - public void testCreateSmallBlockedBloomFilter() { - Hash.setSeed(872153271794238865L); - BlockedBloom filter = BlockedBloom.construct(new long[]{1, 2, 3}, 8); - assertTrue(filter.mayContain(1)); - assertTrue(filter.mayContain(2)); - assertTrue(filter.mayContain(3)); - } - -} \ No newline at end of file diff --git a/fastfilter/src/test/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomTest.java b/fastfilter/src/test/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomTest.java deleted file mode 100644 index 84a05bb..0000000 --- a/fastfilter/src/test/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomTest.java +++ /dev/null @@ -1,21 +0,0 @@ -package org.fastfilter.bloom.count; - -import org.fastfilter.utils.Hash; -import org.junit.Test; - -import static org.junit.Assert.*; - -public class SuccinctCountingBlockedBloomTest { - - @Test - public void indexOutOfBoundsRegression() { - long seed = 6049486880293779298L; - long[] keys = new long[]{1, 2, 3}; - Hash.setSeed(seed); - SuccinctCountingBlockedBloom filter = SuccinctCountingBlockedBloom.construct(keys, 8); - assertTrue(filter.mayContain(1)); - assertTrue(filter.mayContain(2)); - assertTrue(filter.mayContain(3)); - } - -} \ No newline at end of file From adc4d9b815ff36e13e33e367780d870ef632743e Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Mon, 23 Dec 2019 22:38:21 +0000 Subject: [PATCH 5/7] leave GCS2 broken as it is too slow and complicated to fix --- .../java/org/fastfilter/RegressionTests.java | 3 +- .../java/org/fastfilter/SimpleFuzzer.java | 34 ------------------- 2 files changed, 1 insertion(+), 36 deletions(-) delete mode 100644 fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java diff --git a/fastfilter/src/test/java/org/fastfilter/RegressionTests.java b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java index bfd59a9..9237006 100644 --- a/fastfilter/src/test/java/org/fastfilter/RegressionTests.java +++ b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java @@ -21,8 +21,7 @@ public static Object[][] regressionCases() { // actual this one is impossible to reproduce because of the volatile seed {XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}}, {CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}}, - {CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}}, - {GCS2, -2130647756636796307L, new long[]{1, 2, 3}} + {CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}} }; } diff --git a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java deleted file mode 100644 index fb59b89..0000000 --- a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.fastfilter; - -import org.fastfilter.utils.Hash; -import org.junit.Test; - -import java.util.concurrent.ThreadLocalRandom; - -import static junit.framework.TestCase.assertTrue; - -public class SimpleFuzzer { - - - @Test - public void fuzzTest() { - long[] keys = new long[]{ 1, 2, 3}; - long seed = 0; - - for (FilterType type : FilterType.values()) { - try { - for (int i = 0; i < 1000_000; ++i) { - seed = ThreadLocalRandom.current().nextLong(); - Hash.setSeed(seed); - Filter filter = type.construct(keys, 8); - assertTrue(seed + "/" + type, filter.mayContain(1)); - assertTrue(seed + "/" + type, filter.mayContain(2)); - assertTrue(seed + "/" + type, filter.mayContain(3)); - } - } catch (Exception e) { - System.out.println(seed + "/" + type); - throw e; - } - } - } -} From 71f78bc7869efee0d351d491b698acc0f4683240 Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Tue, 24 Dec 2019 10:34:52 +0000 Subject: [PATCH 6/7] change fixes to blocked bloom filters, fix OOBE bugs in cuckoo+, add simple fuzzer back, ignoring MPHF and GCS2 --- .../org/fastfilter/bloom/BlockedBloom.java | 6 +-- .../org/fastfilter/bloom/count/Select.java | 3 +- .../count/SuccinctCountingBlockedBloom.java | 8 ++-- .../SuccinctCountingBlockedBloomRanked.java | 8 ++-- .../org/fastfilter/cuckoo/CuckooPlus16.java | 2 +- .../org/fastfilter/cuckoo/CuckooPlus8.java | 2 +- .../java/org/fastfilter/RegressionTests.java | 22 ++++++++-- .../java/org/fastfilter/SimpleFuzzer.java | 41 +++++++++++++++++++ .../java/org/fastfilter/TestAllFilters.java | 4 +- 9 files changed, 77 insertions(+), 19 deletions(-) create mode 100644 fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java b/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java index d862c7d..bc43f36 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java @@ -34,7 +34,7 @@ public long getBitCount() { this.seed = Hash.randomSeed(); long bits = (long) entryCount * bitsPerKey; this.buckets = (int) bits / 64; - data = new long[(buckets + 16)]; + data = new long[buckets + 16 + 1]; } @Override @@ -50,7 +50,7 @@ public void add(long key) { long m1 = (1L << hash) | (1L << (hash >> 6)); long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18)); data[start] |= m1; - data[start + (int) (hash >>> 60)] |= m2; + data[start + 1 + (int) (hash >>> 60)] |= m2; } @Override @@ -59,7 +59,7 @@ public boolean mayContain(long key) { int start = Hash.reduce((int) hash, buckets); hash = hash ^ Long.rotateLeft(hash, 32); long a = data[start]; - long b = data[start + (int) (hash >>> 60)]; + long b = data[start + 1 + (int) (hash >>> 60)]; long m1 = (1L << hash) | (1L << (hash >> 6)); long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18)); return ((m1 & a) == m1) && ((m2 & b) == m2); diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java b/fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java index be29ed1..a06989a 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java @@ -149,7 +149,8 @@ public class Select { * @return the position (0 for first bit, 63 for last) */ public static int selectInLong(long x, int n) { - assert n < Long.bitCount(x); + // TODO this adds bytecode weight which influence inlining decisions + assert n < Long.bitCount(x): n + " >= " + Long.bitCount(x); // Phase 1: sums by byte long byteSums = x - ((x & 0xa * ONES_STEP_4) >>> 1); byteSums = (byteSums & 3 * ONES_STEP_4) + diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java index 3b5b93f..c6eb729 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java @@ -61,7 +61,7 @@ public long getBitCount() { this.seed = Hash.randomSeed(); long bits = (long) entryCount * bitsPerKey; this.buckets = (int) bits / 64; - int arrayLength = (buckets + 16); + int arrayLength = buckets + 16 + 1; data = new long[arrayLength]; counts = new long[arrayLength]; overflow = new long[100 + arrayLength * 10 / 100]; @@ -87,7 +87,7 @@ public void add(long key) { if (a2 != a1) { increment(start, a2); } - int second = start + (int) (hash >>> 60); + int second = start + 1 + (int) (hash >>> 60); int a3 = (int) ((hash >> 12) & 63); int a4 = (int) ((hash >> 18) & 63); increment(second, a3); @@ -112,7 +112,7 @@ public void remove(long key) { if (a2 != a1) { decrement(start, a2); } - int second = start + (int) (hash >>> 60); + int second = start + 1 + (int) (hash >>> 60); int a3 = (int) ((hash >> 12) & 63); int a4 = (int) ((hash >> 18) & 63); decrement(second, a3); @@ -142,7 +142,7 @@ public boolean mayContain(long key) { int start = Hash.reduce((int) hash, buckets); hash = hash ^ Long.rotateLeft(hash, 32); long a = data[start]; - long b = data[start + (int) (hash >>> 60)]; + long b = data[start + 1 + (int) (hash >>> 60)]; long m1 = (1L << hash) | (1L << (hash >> 6)); long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18)); return ((m1 & a) == m1) && ((m2 & b) == m2); diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java index efb5d61..34ce553 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java @@ -63,7 +63,7 @@ public long getBitCount() { this.seed = Hash.randomSeed(); long bits = (long) entryCount * bitsPerKey; this.buckets = (int) bits / 64; - int arrayLength = buckets + 16; + int arrayLength = buckets + 16 + 1; data = new long[arrayLength]; counts = new long[arrayLength]; overflow = new long[100 + arrayLength * 10 / 100]; @@ -89,7 +89,7 @@ public void add(long key) { if (a2 != a1) { increment(start, a2); } - int second = start + (int) (hash >>> 60); + int second = start + 1 + (int) (hash >>> 60); int a3 = (int) ((hash >> 12) & 63); int a4 = (int) ((hash >> 18) & 63); increment(second, a3); @@ -114,7 +114,7 @@ public void remove(long key) { if (a2 != a1) { decrement(start, a2); } - int second = start + (int) (hash >>> 60); + int second = start + 1 + (int) (hash >>> 60); int a3 = (int) ((hash >> 12) & 63); int a4 = (int) ((hash >> 18) & 63); decrement(second, a3); @@ -144,7 +144,7 @@ public boolean mayContain(long key) { int start = Hash.reduce((int) hash, buckets); hash = hash ^ Long.rotateLeft(hash, 32); long a = data[start]; - long b = data[start + (int) (hash >>> 60)]; + long b = data[start + 1 + (int) (hash >>> 60)]; long m1 = (1L << hash) | (1L << (hash >> 6)); long m2 = (1L << (hash >> 12)) | (1L << (hash >> 18)); return ((m1 & a) == m1) && ((m2 & b) == m2); diff --git a/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java b/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java index 3562769..b2e6d1c 100644 --- a/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java +++ b/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java @@ -42,7 +42,7 @@ public static CuckooPlus16 construct(long[] keys) { public CuckooPlus16(int capacity) { // bucketCount needs to be even for bucket2 to work bucketCount = (int) Math.ceil((double) capacity) / 2 * 2; - this.data = new short[bucketCount + 1]; + this.data = new short[bucketCount + 2]; this.seed = Hash.randomSeed(); } diff --git a/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java b/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java index ddd40e9..45bb751 100644 --- a/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java +++ b/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java @@ -42,7 +42,7 @@ public static CuckooPlus8 construct(long[] keys) { public CuckooPlus8(int capacity) { // bucketCount needs to be even for bucket2 to work bucketCount = (int) Math.ceil((double) capacity) / 2 * 2; - this.data = new byte[bucketCount + 1]; + this.data = new byte[bucketCount + 2]; this.seed = Hash.randomSeed(); } diff --git a/fastfilter/src/test/java/org/fastfilter/RegressionTests.java b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java index 9237006..1c02e89 100644 --- a/fastfilter/src/test/java/org/fastfilter/RegressionTests.java +++ b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java @@ -14,15 +14,31 @@ public class RegressionTests { @Parameterized.Parameters(name = "{0}/{1}/*") public static Object[][] regressionCases() { - return new Object[][] { + return new Object[][]{ {BLOCKED_BLOOM, 872153271794238865L, new long[]{1, 2, 3}}, {SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED, -401700599714690558L, new long[]{1, 2, 3}}, {SUCCINCT_COUNTING_BLOCKED_BLOOM, 6049486880293779298L, new long[]{1, 2, 3}}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM, 353772444652436712L, new long[]{5828366214313827392L, -8467365400393984494L, -424469057572555653L}}, // actual this one is impossible to reproduce because of the volatile seed {XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}}, {CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}}, - {CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}} - }; + {CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}}, + {CUCKOO_PLUS_8, -4031187722136552688L, new long[]{2173645522219008926L, 589862361776609381L, -1776331367981897399L, -7505626095864333717L, 6968992741301426055L, -3110009760358584538L, + 4126573288832158972L, -7561361506777543806L, -5363365907738450196L, 4406554949060325754L, 6610203208080690753L, 3455015316204788042L, 7863420196911575708L, 1875128261287193281L, + 6163360156169844663L, -24248169001003216L, -62326545792238735L, 5810209567031734221L, -2543215903193150719L, 8066741310405890113L, -1700763885488699715L, 331022494986758365L, + 6921011948518481376L, -4135401271689018905L, -3648707841443156724L, 8304743068009082509L, -6681730404693737112L, 1427756985322103926L, 7726889622988885916L, 4123575358133211499L, + 4537462330215573723L, 9078573934276235401L, 32187183317483562L, -1841847540329070596L, -8420216857639877248L, -8421265231581213825L, -8233517952154774510L, -4678911007264536715L, + -8526674353687284449L, -27365118851637401L, -254145228777582712L, 2965855027055207977L, -3466341725845433998L, 7006973965168506949L, -3585814173337365788L, 7264252236018528601L, + 4058857911179366207L, 561654263008010300L, 2389635521107751132L, 7314182055688934933L, 5884448457819665732L, -7686492008813074402L, 298658331691777464L, -5830719925234073017L, + -6985871982812486035L, -4355730107235544811L, -6914420638144647786L, 7092124037956934799L, 5352744066168866120L, 4081227363605418964L, 2175125725804301191L, -5792740580295507772L, + -6183692349471335223L, -1221949547344177675L, -8340921677695714065L, 6519388252075884491L, -4726807568999917298L, 2930512993631049657L, -7721504975700326069L, -8479276039617916927L, + -2112370952694584366L, -9059529185598491289L, -6189590607337131826L, -5949793064086556159L, 1557391959671056410L, 4107630139293131578L, 4738411557430294180L, -3606951019798437215L, + -1742301458061239008L, -7389522306890543715L, 3726370125210336256L, -2051912870295294004L, -7639673055712206584L, -2767802468218389090L, 3131241789318669061L, -8316329307438505860L, + -4007166641668927959L, -6102930542977036947L, 7088919565484666773L, -3593550123383986925L, 6613817918373076399L, -7596314495989542882L, -5059595045899697395L, -547306193171270722L, + 8660029473572898552L, -7731225535097214079L, 2058313776967259523L, 2964665398310080884L, 6291785408569188246L, -329774438524923459L, -5664134174314856593L, -5756681006397171776L, 6223635625117218437L}} + } + + ; } private final FilterType type; diff --git a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java new file mode 100644 index 0000000..8ff9ebc --- /dev/null +++ b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java @@ -0,0 +1,41 @@ +package org.fastfilter; + +import org.fastfilter.utils.Hash; + +import java.util.Arrays; +import java.util.EnumSet; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.LongStream; + +import static junit.framework.TestCase.assertTrue; + +public class SimpleFuzzer { + + // implementations with bugs which may not be worth fixing + private static final EnumSet IGNORED = EnumSet.of(FilterType.GCS2, FilterType.MPHF); + + public static void main(String... args) { + long seed = 0; + for (int keyLength = 3; keyLength < 1_000_000; keyLength += 100) { + long[] keys = LongStream.range(0, keyLength).map(i -> ThreadLocalRandom.current().nextLong()).toArray(); + for (FilterType type : FilterType.values()) { + if (IGNORED.contains(type)) { + continue; + } + try { + for (int i = 0; i < 1_000; ++i) { + seed = ThreadLocalRandom.current().nextLong(); + Hash.setSeed(seed); + Filter filter = type.construct(keys, 8); + for (long key : keys) { + assertTrue(seed + "/" + type + "/" + Arrays.toString(keys), filter.mayContain(key)); + } + } + } catch (Exception e) { + System.out.println(seed + "/" + type + "/" + Arrays.toString(keys)); + throw e; + } + } + } + } +} diff --git a/fastfilter/src/test/java/org/fastfilter/TestAllFilters.java b/fastfilter/src/test/java/org/fastfilter/TestAllFilters.java index 2da5314..37ae4c3 100644 --- a/fastfilter/src/test/java/org/fastfilter/TestAllFilters.java +++ b/fastfilter/src/test/java/org/fastfilter/TestAllFilters.java @@ -90,8 +90,8 @@ public static void main(String... args) { for (int size = 1_000_000; size <= 10_000_000; size *= 10) { System.out.println("size " + size); for (int test = 0; test < 10; test++) { -// test(FilterType.BLOOM, size, test, true); -// test(FilterType.BLOCKED_BLOOM, size, test, true); + test(FilterType.BLOOM, size, test, true); + test(FilterType.BLOCKED_BLOOM, size, test, true); test(FilterType.COUNTING_BLOOM, size, test, true); test(FilterType.SUCCINCT_COUNTING_BLOOM, size, test, true); test(FilterType.SUCCINCT_COUNTING_BLOOM_RANKED, size, test, true); From 5100ef9254655ea11ce04dccb054d6b0d9289aad Mon Sep 17 00:00:00 2001 From: Richard Startin Date: Fri, 27 Dec 2019 15:00:45 +0000 Subject: [PATCH 7/7] revert change to XorSimple, add false negative test cases for MPHF, GCS2, COUNTING_BLOOM --- .../java/org/fastfilter/xor/XorSimple.java | 2 +- .../java/org/fastfilter/RegressionTests.java | 45 ++++++++++++------- .../java/org/fastfilter/SimpleFuzzer.java | 35 +++++++-------- 3 files changed, 47 insertions(+), 35 deletions(-) diff --git a/fastfilter/src/main/java/org/fastfilter/xor/XorSimple.java b/fastfilter/src/main/java/org/fastfilter/xor/XorSimple.java index 88ba12d..bb2287e 100644 --- a/fastfilter/src/main/java/org/fastfilter/xor/XorSimple.java +++ b/fastfilter/src/main/java/org/fastfilter/xor/XorSimple.java @@ -51,7 +51,7 @@ boolean map(long[] keys, long seed, long[] stack) { } } int si = 0; - while (si < 2 * keys.length && qi > 0) { + while (si < 2 * keys.length) { int i = Q[--qi]; if (C[i] == 1) { long x = H[i]; diff --git a/fastfilter/src/test/java/org/fastfilter/RegressionTests.java b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java index 1c02e89..93b5342 100644 --- a/fastfilter/src/test/java/org/fastfilter/RegressionTests.java +++ b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java @@ -12,17 +12,17 @@ public class RegressionTests { - @Parameterized.Parameters(name = "{0}/{1}/*") + @Parameterized.Parameters(name = "{0}/seed={1}/{3} bits per key") public static Object[][] regressionCases() { return new Object[][]{ - {BLOCKED_BLOOM, 872153271794238865L, new long[]{1, 2, 3}}, - {SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED, -401700599714690558L, new long[]{1, 2, 3}}, - {SUCCINCT_COUNTING_BLOCKED_BLOOM, 6049486880293779298L, new long[]{1, 2, 3}}, - {SUCCINCT_COUNTING_BLOCKED_BLOOM, 353772444652436712L, new long[]{5828366214313827392L, -8467365400393984494L, -424469057572555653L}}, - // actual this one is impossible to reproduce because of the volatile seed - {XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}}, - {CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}}, - {CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}}, + {BLOCKED_BLOOM, 872153271794238865L, new long[]{1, 2, 3}, 8}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED, -401700599714690558L, new long[]{1, 2, 3}, 8}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM, 6049486880293779298L, new long[]{1, 2, 3}, 8}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM, 353772444652436712L, new long[]{5828366214313827392L, -8467365400393984494L, -424469057572555653L}, 8}, + // actually this one is impossible to reproduce because of the volatile seed + {XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}, 8}, + {CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}, 8}, + {CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}, 8}, {CUCKOO_PLUS_8, -4031187722136552688L, new long[]{2173645522219008926L, 589862361776609381L, -1776331367981897399L, -7505626095864333717L, 6968992741301426055L, -3110009760358584538L, 4126573288832158972L, -7561361506777543806L, -5363365907738450196L, 4406554949060325754L, 6610203208080690753L, 3455015316204788042L, 7863420196911575708L, 1875128261287193281L, 6163360156169844663L, -24248169001003216L, -62326545792238735L, 5810209567031734221L, -2543215903193150719L, 8066741310405890113L, -1700763885488699715L, 331022494986758365L, @@ -35,26 +35,41 @@ public static Object[][] regressionCases() { -2112370952694584366L, -9059529185598491289L, -6189590607337131826L, -5949793064086556159L, 1557391959671056410L, 4107630139293131578L, 4738411557430294180L, -3606951019798437215L, -1742301458061239008L, -7389522306890543715L, 3726370125210336256L, -2051912870295294004L, -7639673055712206584L, -2767802468218389090L, 3131241789318669061L, -8316329307438505860L, -4007166641668927959L, -6102930542977036947L, 7088919565484666773L, -3593550123383986925L, 6613817918373076399L, -7596314495989542882L, -5059595045899697395L, -547306193171270722L, - 8660029473572898552L, -7731225535097214079L, 2058313776967259523L, 2964665398310080884L, 6291785408569188246L, -329774438524923459L, -5664134174314856593L, -5756681006397171776L, 6223635625117218437L}} - } - - ; + 8660029473572898552L, -7731225535097214079L, 2058313776967259523L, 2964665398310080884L, 6291785408569188246L, -329774438524923459L, -5664134174314856593L, -5756681006397171776L, 6223635625117218437L}, 8}, + {MPHF, 5400005265475528641L, new long[]{1773227589100607582L, 1401008621823229258L, 901259869510331588L, 1197333276475942193L, 1651119322544330030L, 986112488938952069L, + 1675726966169519337L, 1888976485651830901L, 1912475806632315628L, 74149177065144196L, 942187212974983392L, 4215890488646823727L, 3694125823111201993L, 3793738020275325587L, + 2995933316126352930L, 4017238031310632606L, 3798301062142417109L, 4113831042388378630L, 2707645218409175553L, 3919094501360474098L, 4252303149040498185L, 4199952774063362014L, + 3327107703856825600L, 3964961892107416731L, 3966935050689896802L, 5921581983460164542L, 5314808407468600915L, 4696106051339789101L, 6634550099558541650L, 6382215924765560390L, + 5154426188333895839L, 6466726512887879802L, 4836037707257613543L, 5608288809216362089L, 6793579614382201757L, 6709676086154795823L, 5972763369063718749L, 4765003610184494484L, + 5635899990946803784L, 5349364953307177057L, 6264947502670452080L, 6912802837350428240L, 5429101923532929753L, 5668285853203792528L, 6563481559119688471L, 6317103420640399795L, + 8937635149702679081L, 8062485652179232600L, 8942552659025336850L, 8508924203915110088L, 8938353353354172574L, 7907183519152868142L, 8654059200278009367L, 9151769575477085925L, + 8494748655862745947L, 8180511740959930009L, 8244780136171765059L, 9165671267726030534L, 8022333815153416350L, -7348602598025993307L, -7137527130402610919L, -8864995500791741494L, + -7906426467332813681L, -7343692788430814188L, -9007903685362026026L, -9178084101442809748L, -7526812997805935236L, -7640655228186765204L, -6001026700792546473L, -6870431948453764034L, + -5271447769651360857L, -5591560689279781023L, -5868299437269234751L, -6226415928272647338L, -5431159857161381398L, -6370987534222793305L, -3043487285958836631L, -4301361355076290527L, + -3682760495848399784L, -3038236626480548566L, -3895662199162059335L, -3192071612777396897L, -2729235696166508115L, -3087500698602513665L, -4156274151845244416L, -3309406490623888358L, + -2528282539021436624L, -1633985981412420612L, -360913997783076114L, -111396594598251164L, -1339842643116805785L, -1403112313973786426L, -856792793066744400L, -392622225906607155L, + -863763710126232180L, -400874713595065720L, -373641626604004087L, -1951676159570020905L, -1774490078013273270L, -468961924964997308L, -1210600430103212706L, -384877607682781339L, -1945436007627906978L}, 8}, + {COUNTING_BLOOM, 6360526788365209414L, new long[]{-4535795219140351433L, 4882771549875911188L, -6502814355560814028L}, 16}, + {GCS2, -2130647756636796307L, new long[]{1, 2, 3}, 8} + }; } private final FilterType type; private final long seed; private final long[] keys; + private final int bitsPerKey; - public RegressionTests(FilterType type, long seed, long[] keys) { + public RegressionTests(FilterType type, long seed, long[] keys, int bitsPerKey) { this.type = type; this.seed = seed; this.keys = keys; + this.bitsPerKey = bitsPerKey; } @Test public void regressionTest() { Hash.setSeed(seed); - Filter filter = type.construct(keys, 8); + Filter filter = type.construct(keys, bitsPerKey); for (long key : keys) { assertTrue(filter.mayContain(key)); } diff --git a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java index 8ff9ebc..3005bdb 100644 --- a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java +++ b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java @@ -8,32 +8,29 @@ import java.util.stream.LongStream; import static junit.framework.TestCase.assertTrue; +import static org.fastfilter.FilterType.*; public class SimpleFuzzer { - // implementations with bugs which may not be worth fixing - private static final EnumSet IGNORED = EnumSet.of(FilterType.GCS2, FilterType.MPHF); - public static void main(String... args) { long seed = 0; - for (int keyLength = 3; keyLength < 1_000_000; keyLength += 100) { - long[] keys = LongStream.range(0, keyLength).map(i -> ThreadLocalRandom.current().nextLong()).toArray(); - for (FilterType type : FilterType.values()) { - if (IGNORED.contains(type)) { - continue; - } - try { - for (int i = 0; i < 1_000; ++i) { - seed = ThreadLocalRandom.current().nextLong(); - Hash.setSeed(seed); - Filter filter = type.construct(keys, 8); - for (long key : keys) { - assertTrue(seed + "/" + type + "/" + Arrays.toString(keys), filter.mayContain(key)); + for (int bitsPerKey = 8; bitsPerKey < 32; bitsPerKey += 8) { + for (int keyLength = 3; keyLength < 1_000_000; keyLength += ThreadLocalRandom.current().nextInt(10000)) { + long[] keys = LongStream.range(0, keyLength).map(i -> ThreadLocalRandom.current().nextLong()).toArray(); + for (FilterType type : FilterType.values()) { + try { + for (int i = 0; i < 1_000_000; ++i) { + seed = ThreadLocalRandom.current().nextLong(); + Hash.setSeed(seed); + Filter filter = type.construct(keys, bitsPerKey); + for (long key : keys) { + assertTrue(seed + "/" + type + "/" + Arrays.toString(keys), filter.mayContain(key)); + } } + } catch (Exception e) { + System.out.println(seed + "/" + type + "/" + Arrays.toString(keys)); + throw e; } - } catch (Exception e) { - System.out.println(seed + "/" + type + "/" + Arrays.toString(keys)); - throw e; } } }