diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java b/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java index b8551b5..bc43f36 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/BlockedBloom.java @@ -12,8 +12,8 @@ public class BlockedBloom implements Filter { public static BlockedBloom construct(long[] keys, int bitsPerKey) { - long n = keys.length; - BlockedBloom f = new BlockedBloom((int) n, bitsPerKey); + int n = keys.length; + BlockedBloom f = new BlockedBloom(n, bitsPerKey); for(long x : keys) { f.add(x); } @@ -34,7 +34,7 @@ public long getBitCount() { this.seed = Hash.randomSeed(); long bits = (long) entryCount * bitsPerKey; this.buckets = (int) bits / 64; - data = new long[(int) (buckets + 16)]; + data = new long[buckets + 16 + 1]; } @Override diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java b/fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java index be29ed1..a06989a 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/count/Select.java @@ -149,7 +149,8 @@ public class Select { * @return the position (0 for first bit, 63 for last) */ public static int selectInLong(long x, int n) { - assert n < Long.bitCount(x); + // TODO this adds bytecode weight which influence inlining decisions + assert n < Long.bitCount(x): n + " >= " + Long.bitCount(x); // Phase 1: sums by byte long byteSums = x - ((x & 0xa * ONES_STEP_4) >>> 1); byteSums = (byteSums & 3 * ONES_STEP_4) + diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java index 857636b..c6eb729 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloom.java @@ -61,7 +61,7 @@ public long getBitCount() { this.seed = Hash.randomSeed(); long bits = (long) entryCount * bitsPerKey; this.buckets = (int) bits / 64; - int arrayLength = (int) (buckets + 16); + int arrayLength = buckets + 16 + 1; data = new long[arrayLength]; counts = new long[arrayLength]; overflow = new long[100 + arrayLength * 10 / 100]; diff --git a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java index 3f33ab7..34ce553 100644 --- a/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java +++ b/fastfilter/src/main/java/org/fastfilter/bloom/count/SuccinctCountingBlockedBloomRanked.java @@ -63,7 +63,7 @@ public long getBitCount() { this.seed = Hash.randomSeed(); long bits = (long) entryCount * bitsPerKey; this.buckets = (int) bits / 64; - int arrayLength = buckets + 16; + int arrayLength = buckets + 16 + 1; data = new long[arrayLength]; counts = new long[arrayLength]; overflow = new long[100 + arrayLength * 10 / 100]; diff --git a/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java b/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java index 7084a32..5cf7017 100644 --- a/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java +++ b/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo16.java @@ -39,7 +39,7 @@ public static Cuckoo16 construct(long[] keys) { public Cuckoo16(int capacity) { // bucketCount needs to be even for bucket2 to work - bucketCount = (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2; + bucketCount = Math.max(1, (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2); this.data = new long[bucketCount]; this.seed = Hash.randomSeed(); } diff --git a/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java b/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java index 7bf6f10..c9d7b8a 100644 --- a/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java +++ b/fastfilter/src/main/java/org/fastfilter/cuckoo/Cuckoo8.java @@ -39,7 +39,7 @@ public static Cuckoo8 construct(long[] keys) { public Cuckoo8(int capacity) { // bucketCount needs to be even for bucket2 to work - bucketCount = (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2; + bucketCount = Math.max(1, (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2); this.data = new int[bucketCount]; this.seed = Hash.randomSeed(); } diff --git a/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java b/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java index 3562769..b2e6d1c 100644 --- a/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java +++ b/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus16.java @@ -42,7 +42,7 @@ public static CuckooPlus16 construct(long[] keys) { public CuckooPlus16(int capacity) { // bucketCount needs to be even for bucket2 to work bucketCount = (int) Math.ceil((double) capacity) / 2 * 2; - this.data = new short[bucketCount + 1]; + this.data = new short[bucketCount + 2]; this.seed = Hash.randomSeed(); } diff --git a/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java b/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java index ddd40e9..45bb751 100644 --- a/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java +++ b/fastfilter/src/main/java/org/fastfilter/cuckoo/CuckooPlus8.java @@ -42,7 +42,7 @@ public static CuckooPlus8 construct(long[] keys) { public CuckooPlus8(int capacity) { // bucketCount needs to be even for bucket2 to work bucketCount = (int) Math.ceil((double) capacity) / 2 * 2; - this.data = new byte[bucketCount + 1]; + this.data = new byte[bucketCount + 2]; this.seed = Hash.randomSeed(); } diff --git a/fastfilter/src/test/java/org/fastfilter/RegressionTests.java b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java new file mode 100644 index 0000000..93b5342 --- /dev/null +++ b/fastfilter/src/test/java/org/fastfilter/RegressionTests.java @@ -0,0 +1,77 @@ +package org.fastfilter; + +import org.fastfilter.utils.Hash; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import static org.fastfilter.FilterType.*; +import static org.junit.Assert.assertTrue; + +@RunWith(Parameterized.class) +public class RegressionTests { + + + @Parameterized.Parameters(name = "{0}/seed={1}/{3} bits per key") + public static Object[][] regressionCases() { + return new Object[][]{ + {BLOCKED_BLOOM, 872153271794238865L, new long[]{1, 2, 3}, 8}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED, -401700599714690558L, new long[]{1, 2, 3}, 8}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM, 6049486880293779298L, new long[]{1, 2, 3}, 8}, + {SUCCINCT_COUNTING_BLOCKED_BLOOM, 353772444652436712L, new long[]{5828366214313827392L, -8467365400393984494L, -424469057572555653L}, 8}, + // actually this one is impossible to reproduce because of the volatile seed + {XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}, 8}, + {CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}, 8}, + {CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}, 8}, + {CUCKOO_PLUS_8, -4031187722136552688L, new long[]{2173645522219008926L, 589862361776609381L, -1776331367981897399L, -7505626095864333717L, 6968992741301426055L, -3110009760358584538L, + 4126573288832158972L, -7561361506777543806L, -5363365907738450196L, 4406554949060325754L, 6610203208080690753L, 3455015316204788042L, 7863420196911575708L, 1875128261287193281L, + 6163360156169844663L, -24248169001003216L, -62326545792238735L, 5810209567031734221L, -2543215903193150719L, 8066741310405890113L, -1700763885488699715L, 331022494986758365L, + 6921011948518481376L, -4135401271689018905L, -3648707841443156724L, 8304743068009082509L, -6681730404693737112L, 1427756985322103926L, 7726889622988885916L, 4123575358133211499L, + 4537462330215573723L, 9078573934276235401L, 32187183317483562L, -1841847540329070596L, -8420216857639877248L, -8421265231581213825L, -8233517952154774510L, -4678911007264536715L, + -8526674353687284449L, -27365118851637401L, -254145228777582712L, 2965855027055207977L, -3466341725845433998L, 7006973965168506949L, -3585814173337365788L, 7264252236018528601L, + 4058857911179366207L, 561654263008010300L, 2389635521107751132L, 7314182055688934933L, 5884448457819665732L, -7686492008813074402L, 298658331691777464L, -5830719925234073017L, + -6985871982812486035L, -4355730107235544811L, -6914420638144647786L, 7092124037956934799L, 5352744066168866120L, 4081227363605418964L, 2175125725804301191L, -5792740580295507772L, + -6183692349471335223L, -1221949547344177675L, -8340921677695714065L, 6519388252075884491L, -4726807568999917298L, 2930512993631049657L, -7721504975700326069L, -8479276039617916927L, + -2112370952694584366L, -9059529185598491289L, -6189590607337131826L, -5949793064086556159L, 1557391959671056410L, 4107630139293131578L, 4738411557430294180L, -3606951019798437215L, + -1742301458061239008L, -7389522306890543715L, 3726370125210336256L, -2051912870295294004L, -7639673055712206584L, -2767802468218389090L, 3131241789318669061L, -8316329307438505860L, + -4007166641668927959L, -6102930542977036947L, 7088919565484666773L, -3593550123383986925L, 6613817918373076399L, -7596314495989542882L, -5059595045899697395L, -547306193171270722L, + 8660029473572898552L, -7731225535097214079L, 2058313776967259523L, 2964665398310080884L, 6291785408569188246L, -329774438524923459L, -5664134174314856593L, -5756681006397171776L, 6223635625117218437L}, 8}, + {MPHF, 5400005265475528641L, new long[]{1773227589100607582L, 1401008621823229258L, 901259869510331588L, 1197333276475942193L, 1651119322544330030L, 986112488938952069L, + 1675726966169519337L, 1888976485651830901L, 1912475806632315628L, 74149177065144196L, 942187212974983392L, 4215890488646823727L, 3694125823111201993L, 3793738020275325587L, + 2995933316126352930L, 4017238031310632606L, 3798301062142417109L, 4113831042388378630L, 2707645218409175553L, 3919094501360474098L, 4252303149040498185L, 4199952774063362014L, + 3327107703856825600L, 3964961892107416731L, 3966935050689896802L, 5921581983460164542L, 5314808407468600915L, 4696106051339789101L, 6634550099558541650L, 6382215924765560390L, + 5154426188333895839L, 6466726512887879802L, 4836037707257613543L, 5608288809216362089L, 6793579614382201757L, 6709676086154795823L, 5972763369063718749L, 4765003610184494484L, + 5635899990946803784L, 5349364953307177057L, 6264947502670452080L, 6912802837350428240L, 5429101923532929753L, 5668285853203792528L, 6563481559119688471L, 6317103420640399795L, + 8937635149702679081L, 8062485652179232600L, 8942552659025336850L, 8508924203915110088L, 8938353353354172574L, 7907183519152868142L, 8654059200278009367L, 9151769575477085925L, + 8494748655862745947L, 8180511740959930009L, 8244780136171765059L, 9165671267726030534L, 8022333815153416350L, -7348602598025993307L, -7137527130402610919L, -8864995500791741494L, + -7906426467332813681L, -7343692788430814188L, -9007903685362026026L, -9178084101442809748L, -7526812997805935236L, -7640655228186765204L, -6001026700792546473L, -6870431948453764034L, + -5271447769651360857L, -5591560689279781023L, -5868299437269234751L, -6226415928272647338L, -5431159857161381398L, -6370987534222793305L, -3043487285958836631L, -4301361355076290527L, + -3682760495848399784L, -3038236626480548566L, -3895662199162059335L, -3192071612777396897L, -2729235696166508115L, -3087500698602513665L, -4156274151845244416L, -3309406490623888358L, + -2528282539021436624L, -1633985981412420612L, -360913997783076114L, -111396594598251164L, -1339842643116805785L, -1403112313973786426L, -856792793066744400L, -392622225906607155L, + -863763710126232180L, -400874713595065720L, -373641626604004087L, -1951676159570020905L, -1774490078013273270L, -468961924964997308L, -1210600430103212706L, -384877607682781339L, -1945436007627906978L}, 8}, + {COUNTING_BLOOM, 6360526788365209414L, new long[]{-4535795219140351433L, 4882771549875911188L, -6502814355560814028L}, 16}, + {GCS2, -2130647756636796307L, new long[]{1, 2, 3}, 8} + }; + } + + private final FilterType type; + private final long seed; + private final long[] keys; + private final int bitsPerKey; + + public RegressionTests(FilterType type, long seed, long[] keys, int bitsPerKey) { + this.type = type; + this.seed = seed; + this.keys = keys; + this.bitsPerKey = bitsPerKey; + } + + @Test + public void regressionTest() { + Hash.setSeed(seed); + Filter filter = type.construct(keys, bitsPerKey); + for (long key : keys) { + assertTrue(filter.mayContain(key)); + } + } +} diff --git a/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java new file mode 100644 index 0000000..3005bdb --- /dev/null +++ b/fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java @@ -0,0 +1,38 @@ +package org.fastfilter; + +import org.fastfilter.utils.Hash; + +import java.util.Arrays; +import java.util.EnumSet; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.LongStream; + +import static junit.framework.TestCase.assertTrue; +import static org.fastfilter.FilterType.*; + +public class SimpleFuzzer { + + public static void main(String... args) { + long seed = 0; + for (int bitsPerKey = 8; bitsPerKey < 32; bitsPerKey += 8) { + for (int keyLength = 3; keyLength < 1_000_000; keyLength += ThreadLocalRandom.current().nextInt(10000)) { + long[] keys = LongStream.range(0, keyLength).map(i -> ThreadLocalRandom.current().nextLong()).toArray(); + for (FilterType type : FilterType.values()) { + try { + for (int i = 0; i < 1_000_000; ++i) { + seed = ThreadLocalRandom.current().nextLong(); + Hash.setSeed(seed); + Filter filter = type.construct(keys, bitsPerKey); + for (long key : keys) { + assertTrue(seed + "/" + type + "/" + Arrays.toString(keys), filter.mayContain(key)); + } + } + } catch (Exception e) { + System.out.println(seed + "/" + type + "/" + Arrays.toString(keys)); + throw e; + } + } + } + } + } +} diff --git a/fastfilter/src/test/java/org/fastfilter/TestAllFilters.java b/fastfilter/src/test/java/org/fastfilter/TestAllFilters.java index 2da5314..37ae4c3 100644 --- a/fastfilter/src/test/java/org/fastfilter/TestAllFilters.java +++ b/fastfilter/src/test/java/org/fastfilter/TestAllFilters.java @@ -90,8 +90,8 @@ public static void main(String... args) { for (int size = 1_000_000; size <= 10_000_000; size *= 10) { System.out.println("size " + size); for (int test = 0; test < 10; test++) { -// test(FilterType.BLOOM, size, test, true); -// test(FilterType.BLOCKED_BLOOM, size, test, true); + test(FilterType.BLOOM, size, test, true); + test(FilterType.BLOCKED_BLOOM, size, test, true); test(FilterType.COUNTING_BLOOM, size, test, true); test(FilterType.SUCCINCT_COUNTING_BLOOM, size, test, true); test(FilterType.SUCCINCT_COUNTING_BLOOM_RANKED, size, test, true);