Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
public class BlockedBloom implements Filter {

public static BlockedBloom construct(long[] keys, int bitsPerKey) {
long n = keys.length;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, using long doesn't make sense here... I used long in some places to avoid integer overflow, but here it's not needed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In cases where you can reason that overflow is impossible, accumulating into a long can cause significant degradation in throughput once the loop has been compiled by C2. I can share some benchmarks which demonstrate this. Of course, sometimes it's the only safe thing to do...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I wasn't aware of this... I will keep this in mind!

BlockedBloom f = new BlockedBloom((int) n, bitsPerKey);
int n = keys.length;
BlockedBloom f = new BlockedBloom(n, bitsPerKey);
for(long x : keys) {
f.add(x);
}
Expand All @@ -34,7 +34,7 @@ public long getBitCount() {
this.seed = Hash.randomSeed();
long bits = (long) entryCount * bitsPerKey;
this.buckets = (int) bits / 64;
data = new long[(int) (buckets + 16)];
data = new long[buckets + 16 + 1];
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ public class Select {
* @return the position (0 for first bit, 63 for last)
*/
public static int selectInLong(long x, int n) {
assert n < Long.bitCount(x);
// TODO this adds bytecode weight which influence inlining decisions
assert n < Long.bitCount(x): n + " >= " + Long.bitCount(x);
// Phase 1: sums by byte
long byteSums = x - ((x & 0xa * ONES_STEP_4) >>> 1);
byteSums = (byteSums & 3 * ONES_STEP_4) +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public long getBitCount() {
this.seed = Hash.randomSeed();
long bits = (long) entryCount * bitsPerKey;
this.buckets = (int) bits / 64;
int arrayLength = (int) (buckets + 16);
int arrayLength = buckets + 16 + 1;
data = new long[arrayLength];
counts = new long[arrayLength];
overflow = new long[100 + arrayLength * 10 / 100];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public long getBitCount() {
this.seed = Hash.randomSeed();
long bits = (long) entryCount * bitsPerKey;
this.buckets = (int) bits / 64;
int arrayLength = buckets + 16;
int arrayLength = buckets + 16 + 1;
data = new long[arrayLength];
counts = new long[arrayLength];
overflow = new long[100 + arrayLength * 10 / 100];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public static Cuckoo16 construct(long[] keys) {

public Cuckoo16(int capacity) {
// bucketCount needs to be even for bucket2 to work
bucketCount = (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2;
bucketCount = Math.max(1, (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2);
this.data = new long[bucketCount];
this.seed = Hash.randomSeed();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public static Cuckoo8 construct(long[] keys) {

public Cuckoo8(int capacity) {
// bucketCount needs to be even for bucket2 to work
bucketCount = (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2;
bucketCount = Math.max(1, (int) Math.ceil((double) capacity / ENTRIES_PER_BUCKET) / 2 * 2);
this.data = new int[bucketCount];
this.seed = Hash.randomSeed();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static CuckooPlus16 construct(long[] keys) {
public CuckooPlus16(int capacity) {
// bucketCount needs to be even for bucket2 to work
bucketCount = (int) Math.ceil((double) capacity) / 2 * 2;
this.data = new short[bucketCount + 1];
this.data = new short[bucketCount + 2];
this.seed = Hash.randomSeed();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public static CuckooPlus8 construct(long[] keys) {
public CuckooPlus8(int capacity) {
// bucketCount needs to be even for bucket2 to work
bucketCount = (int) Math.ceil((double) capacity) / 2 * 2;
this.data = new byte[bucketCount + 1];
this.data = new byte[bucketCount + 2];
this.seed = Hash.randomSeed();
}

Expand Down
77 changes: 77 additions & 0 deletions fastfilter/src/test/java/org/fastfilter/RegressionTests.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package org.fastfilter;

import org.fastfilter.utils.Hash;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

import static org.fastfilter.FilterType.*;
import static org.junit.Assert.assertTrue;

@RunWith(Parameterized.class)
public class RegressionTests {


@Parameterized.Parameters(name = "{0}/seed={1}/{3} bits per key")
public static Object[][] regressionCases() {
return new Object[][]{
{BLOCKED_BLOOM, 872153271794238865L, new long[]{1, 2, 3}, 8},
{SUCCINCT_COUNTING_BLOCKED_BLOOM_RANKED, -401700599714690558L, new long[]{1, 2, 3}, 8},
{SUCCINCT_COUNTING_BLOCKED_BLOOM, 6049486880293779298L, new long[]{1, 2, 3}, 8},
{SUCCINCT_COUNTING_BLOCKED_BLOOM, 353772444652436712L, new long[]{5828366214313827392L, -8467365400393984494L, -424469057572555653L}, 8},
// actually this one is impossible to reproduce because of the volatile seed
{XOR_SIMPLE, 6831634639270950343L, new long[]{1, 2, 3}, 8},
{CUCKOO_8, 6335419348330489927L, new long[]{1, 2, 3}, 8},
{CUCKOO_16, -9087718164446355442L, new long[]{1, 2, 3}, 8},
{CUCKOO_PLUS_8, -4031187722136552688L, new long[]{2173645522219008926L, 589862361776609381L, -1776331367981897399L, -7505626095864333717L, 6968992741301426055L, -3110009760358584538L,
4126573288832158972L, -7561361506777543806L, -5363365907738450196L, 4406554949060325754L, 6610203208080690753L, 3455015316204788042L, 7863420196911575708L, 1875128261287193281L,
6163360156169844663L, -24248169001003216L, -62326545792238735L, 5810209567031734221L, -2543215903193150719L, 8066741310405890113L, -1700763885488699715L, 331022494986758365L,
6921011948518481376L, -4135401271689018905L, -3648707841443156724L, 8304743068009082509L, -6681730404693737112L, 1427756985322103926L, 7726889622988885916L, 4123575358133211499L,
4537462330215573723L, 9078573934276235401L, 32187183317483562L, -1841847540329070596L, -8420216857639877248L, -8421265231581213825L, -8233517952154774510L, -4678911007264536715L,
-8526674353687284449L, -27365118851637401L, -254145228777582712L, 2965855027055207977L, -3466341725845433998L, 7006973965168506949L, -3585814173337365788L, 7264252236018528601L,
4058857911179366207L, 561654263008010300L, 2389635521107751132L, 7314182055688934933L, 5884448457819665732L, -7686492008813074402L, 298658331691777464L, -5830719925234073017L,
-6985871982812486035L, -4355730107235544811L, -6914420638144647786L, 7092124037956934799L, 5352744066168866120L, 4081227363605418964L, 2175125725804301191L, -5792740580295507772L,
-6183692349471335223L, -1221949547344177675L, -8340921677695714065L, 6519388252075884491L, -4726807568999917298L, 2930512993631049657L, -7721504975700326069L, -8479276039617916927L,
-2112370952694584366L, -9059529185598491289L, -6189590607337131826L, -5949793064086556159L, 1557391959671056410L, 4107630139293131578L, 4738411557430294180L, -3606951019798437215L,
-1742301458061239008L, -7389522306890543715L, 3726370125210336256L, -2051912870295294004L, -7639673055712206584L, -2767802468218389090L, 3131241789318669061L, -8316329307438505860L,
-4007166641668927959L, -6102930542977036947L, 7088919565484666773L, -3593550123383986925L, 6613817918373076399L, -7596314495989542882L, -5059595045899697395L, -547306193171270722L,
8660029473572898552L, -7731225535097214079L, 2058313776967259523L, 2964665398310080884L, 6291785408569188246L, -329774438524923459L, -5664134174314856593L, -5756681006397171776L, 6223635625117218437L}, 8},
{MPHF, 5400005265475528641L, new long[]{1773227589100607582L, 1401008621823229258L, 901259869510331588L, 1197333276475942193L, 1651119322544330030L, 986112488938952069L,
1675726966169519337L, 1888976485651830901L, 1912475806632315628L, 74149177065144196L, 942187212974983392L, 4215890488646823727L, 3694125823111201993L, 3793738020275325587L,
2995933316126352930L, 4017238031310632606L, 3798301062142417109L, 4113831042388378630L, 2707645218409175553L, 3919094501360474098L, 4252303149040498185L, 4199952774063362014L,
3327107703856825600L, 3964961892107416731L, 3966935050689896802L, 5921581983460164542L, 5314808407468600915L, 4696106051339789101L, 6634550099558541650L, 6382215924765560390L,
5154426188333895839L, 6466726512887879802L, 4836037707257613543L, 5608288809216362089L, 6793579614382201757L, 6709676086154795823L, 5972763369063718749L, 4765003610184494484L,
5635899990946803784L, 5349364953307177057L, 6264947502670452080L, 6912802837350428240L, 5429101923532929753L, 5668285853203792528L, 6563481559119688471L, 6317103420640399795L,
8937635149702679081L, 8062485652179232600L, 8942552659025336850L, 8508924203915110088L, 8938353353354172574L, 7907183519152868142L, 8654059200278009367L, 9151769575477085925L,
8494748655862745947L, 8180511740959930009L, 8244780136171765059L, 9165671267726030534L, 8022333815153416350L, -7348602598025993307L, -7137527130402610919L, -8864995500791741494L,
-7906426467332813681L, -7343692788430814188L, -9007903685362026026L, -9178084101442809748L, -7526812997805935236L, -7640655228186765204L, -6001026700792546473L, -6870431948453764034L,
-5271447769651360857L, -5591560689279781023L, -5868299437269234751L, -6226415928272647338L, -5431159857161381398L, -6370987534222793305L, -3043487285958836631L, -4301361355076290527L,
-3682760495848399784L, -3038236626480548566L, -3895662199162059335L, -3192071612777396897L, -2729235696166508115L, -3087500698602513665L, -4156274151845244416L, -3309406490623888358L,
-2528282539021436624L, -1633985981412420612L, -360913997783076114L, -111396594598251164L, -1339842643116805785L, -1403112313973786426L, -856792793066744400L, -392622225906607155L,
-863763710126232180L, -400874713595065720L, -373641626604004087L, -1951676159570020905L, -1774490078013273270L, -468961924964997308L, -1210600430103212706L, -384877607682781339L, -1945436007627906978L}, 8},
{COUNTING_BLOOM, 6360526788365209414L, new long[]{-4535795219140351433L, 4882771549875911188L, -6502814355560814028L}, 16},
{GCS2, -2130647756636796307L, new long[]{1, 2, 3}, 8}
};
}

private final FilterType type;
private final long seed;
private final long[] keys;
private final int bitsPerKey;

public RegressionTests(FilterType type, long seed, long[] keys, int bitsPerKey) {
this.type = type;
this.seed = seed;
this.keys = keys;
this.bitsPerKey = bitsPerKey;
}

@Test
public void regressionTest() {
Hash.setSeed(seed);
Filter filter = type.construct(keys, bitsPerKey);
for (long key : keys) {
assertTrue(filter.mayContain(key));
}
}
}
38 changes: 38 additions & 0 deletions fastfilter/src/test/java/org/fastfilter/SimpleFuzzer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.fastfilter;

import org.fastfilter.utils.Hash;

import java.util.Arrays;
import java.util.EnumSet;
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.LongStream;

import static junit.framework.TestCase.assertTrue;
import static org.fastfilter.FilterType.*;

public class SimpleFuzzer {

public static void main(String... args) {
long seed = 0;
for (int bitsPerKey = 8; bitsPerKey < 32; bitsPerKey += 8) {
for (int keyLength = 3; keyLength < 1_000_000; keyLength += ThreadLocalRandom.current().nextInt(10000)) {
long[] keys = LongStream.range(0, keyLength).map(i -> ThreadLocalRandom.current().nextLong()).toArray();
for (FilterType type : FilterType.values()) {
try {
for (int i = 0; i < 1_000_000; ++i) {
seed = ThreadLocalRandom.current().nextLong();
Hash.setSeed(seed);
Filter filter = type.construct(keys, bitsPerKey);
for (long key : keys) {
assertTrue(seed + "/" + type + "/" + Arrays.toString(keys), filter.mayContain(key));
}
}
} catch (Exception e) {
System.out.println(seed + "/" + type + "/" + Arrays.toString(keys));
throw e;
}
}
}
}
}
}
4 changes: 2 additions & 2 deletions fastfilter/src/test/java/org/fastfilter/TestAllFilters.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ public static void main(String... args) {
for (int size = 1_000_000; size <= 10_000_000; size *= 10) {
System.out.println("size " + size);
for (int test = 0; test < 10; test++) {
// test(FilterType.BLOOM, size, test, true);
// test(FilterType.BLOCKED_BLOOM, size, test, true);
test(FilterType.BLOOM, size, test, true);
test(FilterType.BLOCKED_BLOOM, size, test, true);
test(FilterType.COUNTING_BLOOM, size, test, true);
test(FilterType.SUCCINCT_COUNTING_BLOOM, size, test, true);
test(FilterType.SUCCINCT_COUNTING_BLOOM_RANKED, size, test, true);
Expand Down