Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Sep 26, 2023
1 parent c4254c5 commit 8e84f87
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 54 deletions.
133 changes: 104 additions & 29 deletions dependencies/xor_singleheader/include/binaryfusefilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,22 @@
// highly unlikely
#endif

static int binary_fuse_cmpfunc(const void * a, const void * b) {
return ( *(const uint64_t*)a - *(const uint64_t*)b );
}

static size_t binary_fuse_sort_and_remove_dup(uint64_t* keys, size_t length) {
qsort(keys, length, sizeof(uint64_t), binary_fuse_cmpfunc);
size_t j = 0;
for(size_t i = 1; i < length; i++) {
if(keys[i] != keys[i-1]) {
keys[j] = keys[i];
j++;
}
}
return j+1;
}

/**
* We start with a few utilities.
***/
Expand Down Expand Up @@ -60,13 +76,73 @@ typedef struct binary_fuse8_s {
uint8_t *Fingerprints;
} binary_fuse8_t;

#ifdef _MSC_VER
// Windows programmers who target 32-bit platform may need help:
static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) { return __umulh(a, b); }
#else
// #ifdefs adapted from:
// https://stackoverflow.com/a/50958815
#ifdef __SIZEOF_INT128__ // compilers supporting __uint128, e.g., gcc, clang
static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
return ((__uint128_t)a * b) >> 64;
}
#elif defined(_M_X64) || defined(_MARM64) // MSVC
static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
return __umulh(a, b);
}
#elif defined(_M_IA64) // also MSVC
static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
unsigned __int64 hi;
(void) _umul128(a, b, &hi);
return hi;
}
#else // portable implementation using uint64_t
static inline uint64_t binary_fuse_mulhi(uint64_t a, uint64_t b) {
// Adapted from:
// https://stackoverflow.com/a/51587262

/*
This is implementing schoolbook multiplication:
a1 a0
X b1 b0
-------------
00 LOW PART
-------------
00
10 10 MIDDLE PART
+ 01
-------------
01
+ 11 11 HIGH PART
-------------
*/

const uint64_t a0 = (uint32_t) a;
const uint64_t a1 = a >> 32;
const uint64_t b0 = (uint32_t) b;
const uint64_t b1 = b >> 32;
const uint64_t p11 = a1 * b1;
const uint64_t p01 = a0 * b1;
const uint64_t p10 = a1 * b0;
const uint64_t p00 = a0 * b0;

// 64-bit product + two 32-bit values
const uint64_t middle = p10 + (p00 >> 32) + (uint32_t) p01;

/*
Proof that 64-bit products can accumulate two more 32-bit values
without overflowing:
Max 32-bit value is 2^32 - 1.
PSum = (2^32-1) * (2^32-1) + (2^32-1) + (2^32-1)
= 2^64 - 2^32 - 2^32 + 1 + 2^32 - 1 + 2^32 - 1
= 2^64 - 1
Therefore the high half below cannot overflow regardless of input.
*/

// high half
return p11 + (middle >> 32) + (p01 >> 32);

// low half (which we don't care about, but here it is)
// (middle << 32) | (uint32_t) p00;
}
#endif

typedef struct binary_hashes_s {
Expand Down Expand Up @@ -151,7 +227,7 @@ static inline bool binary_fuse8_allocate(uint32_t size,
filter->SegmentLength = 262144;
}
filter->SegmentLengthMask = filter->SegmentLength - 1;
double sizeFactor = binary_fuse_calculate_size_factor(arity, size);
double sizeFactor = size <= 1 ? 0 : binary_fuse_calculate_size_factor(arity, size);
uint32_t capacity = size <= 1 ? 0 : (uint32_t)(round((double)size * sizeFactor));
uint32_t initSegmentCount =
(capacity + filter->SegmentLength - 1) / filter->SegmentLength -
Expand Down Expand Up @@ -197,7 +273,7 @@ static inline uint8_t binary_fuse_mod3(uint8_t x) {
// The caller is responsable for calling binary_fuse8_allocate(size,filter)
// before. For best performance, the caller should ensure that there are not too
// many duplicated keys.
static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size,
static inline bool binary_fuse8_populate(uint64_t *keys, uint32_t size,
binary_fuse8_t *filter) {
uint64_t rng_counter = 0x726b2b9d438b9d4d;
filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
Expand Down Expand Up @@ -230,17 +306,15 @@ static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size,
for (int loop = 0; true; ++loop) {
if (loop + 1 > XOR_MAX_ITERATIONS) {
// The probability of this happening is lower than the
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system),
// but if it happens, we just fill the fingerprint with ones which
// will flag all possible keys as 'possible', ensuring a correct result.
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system)
memset(filter->Fingerprints, ~0, filter->ArrayLength);
free(alone);
free(t2count);
free(reverseH);
free(t2hash);
free(reverseOrder);
free(startPos);
return true;
return false;
}

for (uint32_t i = 0; i < block; i++) {
Expand Down Expand Up @@ -295,9 +369,9 @@ static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size,
error = (t2count[h2] < 4) ? 1 : error;
}
if(error) {
memset(reverseOrder, 0, sizeof(uint64_t[size]));
memset(t2count, 0, sizeof(uint8_t[capacity]));
memset(t2hash, 0, sizeof(uint64_t[capacity]));
memset(reverseOrder, 0, sizeof(uint64_t) * size);
memset(t2count, 0, sizeof(uint8_t) * capacity);
memset(t2hash, 0, sizeof(uint64_t) * capacity);
filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
continue;
}
Expand Down Expand Up @@ -345,10 +419,12 @@ static inline bool binary_fuse8_populate(const uint64_t *keys, uint32_t size,
// success
size = stacksize;
break;
} else if(duplicates > 0) {
size = binary_fuse_sort_and_remove_dup(keys, size);
}
memset(reverseOrder, 0, sizeof(uint64_t[size]));
memset(t2count, 0, sizeof(uint8_t[capacity]));
memset(t2hash, 0, sizeof(uint64_t[capacity]));
memset(reverseOrder, 0, sizeof(uint64_t) * size);
memset(t2count, 0, sizeof(uint8_t) * capacity);
memset(t2hash, 0, sizeof(uint64_t) * capacity);
filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
}

Expand Down Expand Up @@ -439,7 +515,7 @@ static inline bool binary_fuse16_allocate(uint32_t size,
}
filter->SegmentLengthMask = filter->SegmentLength - 1;
double sizeFactor = size <= 1 ? 0 : binary_fuse_calculate_size_factor(arity, size);
uint32_t capacity = (uint32_t)(round((double)size * sizeFactor));
uint32_t capacity = size <= 1 ? 0 : (uint32_t)(round((double)size * sizeFactor));
uint32_t initSegmentCount =
(capacity + filter->SegmentLength - 1) / filter->SegmentLength -
(arity - 1);
Expand Down Expand Up @@ -481,7 +557,7 @@ static inline void binary_fuse16_free(binary_fuse16_t *filter) {
// The caller is responsable for calling binary_fuse8_allocate(size,filter)
// before. For best performance, the caller should ensure that there are not too
// many duplicated keys.
static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size,
static inline bool binary_fuse16_populate(uint64_t *keys, uint32_t size,
binary_fuse16_t *filter) {
uint64_t rng_counter = 0x726b2b9d438b9d4d;
filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
Expand Down Expand Up @@ -514,17 +590,14 @@ static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size,
for (int loop = 0; true; ++loop) {
if (loop + 1 > XOR_MAX_ITERATIONS) {
// The probability of this happening is lower than the
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system),
// but if it happens, we just fill the fingerprint with ones which
// will flag all possible keys as 'possible', ensuring a correct result.
memset(filter->Fingerprints, ~0, filter->ArrayLength * sizeof(uint16_t));
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
free(alone);
free(t2count);
free(reverseH);
free(t2hash);
free(reverseOrder);
free(startPos);
return true;
return false;
}

for (uint32_t i = 0; i < block; i++) {
Expand Down Expand Up @@ -579,9 +652,9 @@ static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size,
error = (t2count[h2] < 4) ? 1 : error;
}
if(error) {
memset(reverseOrder, 0, sizeof(uint64_t[size]));
memset(t2count, 0, sizeof(uint8_t[capacity]));
memset(t2hash, 0, sizeof(uint64_t[capacity]));
memset(reverseOrder, 0, sizeof(uint64_t) * size);
memset(t2count, 0, sizeof(uint8_t) * capacity);
memset(t2hash, 0, sizeof(uint64_t) * capacity);
filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
continue;
}
Expand Down Expand Up @@ -629,10 +702,12 @@ static inline bool binary_fuse16_populate(const uint64_t *keys, uint32_t size,
// success
size = stacksize;
break;
} else if(duplicates > 0) {
size = binary_fuse_sort_and_remove_dup(keys, size);
}
memset(reverseOrder, 0, sizeof(uint64_t[size]));
memset(t2count, 0, sizeof(uint8_t[capacity]));
memset(t2hash, 0, sizeof(uint64_t[capacity]));
memset(reverseOrder, 0, sizeof(uint64_t) * size);
memset(t2count, 0, sizeof(uint8_t) * capacity);
memset(t2hash, 0, sizeof(uint64_t) * capacity);
filter->Seed = binary_fuse_rng_splitmix64(&rng_counter);
}

Expand Down
70 changes: 45 additions & 25 deletions dependencies/xor_singleheader/include/xorfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,30 @@
#include <stdlib.h>
#include <string.h>

#ifndef XOR_SORT_ITERATIONS
#define XOR_SORT_ITERATIONS 10 // after 10 iterations, we sort and remove duplicates
#endif

#ifndef XOR_MAX_ITERATIONS
#define XOR_MAX_ITERATIONS 100 // probabillity of success should always be > 0.5 so 100 iterations is highly unlikely
#endif


static int xor_cmpfunc(const void * a, const void * b) {
return ( *(const uint64_t*)a - *(const uint64_t*)b );
}

static size_t xor_sort_and_remove_dup(uint64_t* keys, size_t length) {
qsort(keys, length, sizeof(uint64_t), xor_cmpfunc);
size_t j = 0;
for(size_t i = 1; i < length; i++) {
if(keys[i] != keys[i-1]) {
keys[j] = keys[i];
j++;
}
}
return j+1;
}
/**
* We assume that you have a large set of 64-bit integers
* and you want a data structure to do membership tests using
Expand Down Expand Up @@ -421,10 +441,10 @@ static inline uint32_t xor_flushone_decrement_buffer(xor_setbuffer_t *buffer,

// Construct the filter, returns true on success, false on failure.
// The algorithm fails when there is insufficient memory.
// The caller is responsable for calling binary_fuse8_allocate(size,filter)
// The caller is responsable for calling xor8_allocate(size,filter)
// before. For best performance, the caller should ensure that there are not too
// many duplicated keys.
static inline bool xor8_buffered_populate(const uint64_t *keys, uint32_t size, xor8_t *filter) {
static inline bool xor8_buffered_populate(uint64_t *keys, uint32_t size, xor8_t *filter) {
if(size == 0) { return false; }
uint64_t rng_counter = 1;
filter->seed = xor_rng_splitmix64(&rng_counter);
Expand Down Expand Up @@ -470,12 +490,12 @@ static inline bool xor8_buffered_populate(const uint64_t *keys, uint32_t size, x

while (true) {
iterations ++;
if(iterations == XOR_SORT_ITERATIONS) {
size = xor_sort_and_remove_dup(keys, size);
}
if(iterations > XOR_MAX_ITERATIONS) {
// The probability of this happening is lower than the
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system),
// but if it happens, we just fill the fingerprint with ones which
// will flag all possible keys as 'possible', ensuring a correct result.
memset(filter->fingerprints, ~0, 3 * filter->blockLength);
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
xor_free_buffer(&buffer0);
xor_free_buffer(&buffer1);
xor_free_buffer(&buffer2);
Expand Down Expand Up @@ -632,10 +652,10 @@ static inline bool xor8_buffered_populate(const uint64_t *keys, uint32_t size, x

// Construct the filter, returns true on success, false on failure.
// The algorithm fails when there is insufficient memory.
// The caller is responsable for calling binary_fuse8_allocate(size,filter)
// The caller is responsable for calling xor8_allocate(size,filter)
// before. For best performance, the caller should ensure that there are not too
// many duplicated keys.
static inline bool xor8_populate(const uint64_t *keys, uint32_t size, xor8_t *filter) {
static inline bool xor8_populate(uint64_t *keys, uint32_t size, xor8_t *filter) {
if(size == 0) { return false; }
uint64_t rng_counter = 1;
filter->seed = xor_rng_splitmix64(&rng_counter);
Expand Down Expand Up @@ -668,12 +688,12 @@ static inline bool xor8_populate(const uint64_t *keys, uint32_t size, xor8_t *fi

while (true) {
iterations ++;
if(iterations == XOR_SORT_ITERATIONS) {
size = xor_sort_and_remove_dup(keys, size);
}
if(iterations > XOR_MAX_ITERATIONS) {
// The probability of this happening is lower than the
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system),
// but if it happens, we just fill the fingerprint with ones which
// will flag all possible keys as 'possible', ensuring a correct result.
memset(filter->fingerprints, ~0, 3 * filter->blockLength);
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
free(sets);
free(Q);
free(stack);
Expand Down Expand Up @@ -839,10 +859,10 @@ static inline bool xor8_populate(const uint64_t *keys, uint32_t size, xor8_t *fi

// Construct the filter, returns true on success, false on failure.
// The algorithm fails when there is insufficient memory.
// The caller is responsable for calling binary_fuse8_allocate(size,filter)
// The caller is responsable for calling xor16_allocate(size,filter)
// before. For best performance, the caller should ensure that there are not too
// many duplicated keys.
static inline bool xor16_buffered_populate(const uint64_t *keys, uint32_t size, xor16_t *filter) {
static inline bool xor16_buffered_populate(uint64_t *keys, uint32_t size, xor16_t *filter) {
if(size == 0) { return false; }
uint64_t rng_counter = 1;
filter->seed = xor_rng_splitmix64(&rng_counter);
Expand Down Expand Up @@ -888,12 +908,12 @@ static inline bool xor16_buffered_populate(const uint64_t *keys, uint32_t size,

while (true) {
iterations ++;
if(iterations == XOR_SORT_ITERATIONS) {
size = xor_sort_and_remove_dup(keys, size);
}
if(iterations > XOR_MAX_ITERATIONS) {
// The probability of this happening is lower than the
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system),
// but if it happens, we just fill the fingerprint with ones which
// will flag all possible keys as 'possible', ensuring a correct result.
memset(filter->fingerprints, ~0, 3 * filter->blockLength * sizeof(uint16_t));
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system)é
xor_free_buffer(&buffer0);
xor_free_buffer(&buffer1);
xor_free_buffer(&buffer2);
Expand Down Expand Up @@ -1053,10 +1073,10 @@ static inline bool xor16_buffered_populate(const uint64_t *keys, uint32_t size,

// Construct the filter, returns true on success, false on failure.
// The algorithm fails when there is insufficient memory.
// The caller is responsable for calling binary_fuse8_allocate(size,filter)
// The caller is responsable for calling xor16_allocate(size,filter)
// before. For best performance, the caller should ensure that there are not too
// many duplicated keys.
static inline bool xor16_populate(const uint64_t *keys, uint32_t size, xor16_t *filter) {
static inline bool xor16_populate(uint64_t *keys, uint32_t size, xor16_t *filter) {
if(size == 0) { return false; }
uint64_t rng_counter = 1;
filter->seed = xor_rng_splitmix64(&rng_counter);
Expand Down Expand Up @@ -1090,16 +1110,16 @@ static inline bool xor16_populate(const uint64_t *keys, uint32_t size, xor16_t *

while (true) {
iterations ++;
if(iterations == XOR_SORT_ITERATIONS) {
size = xor_sort_and_remove_dup(keys, size);
}
if(iterations > XOR_MAX_ITERATIONS) {
// The probability of this happening is lower than the
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system),
// but if it happens, we just fill the fingerprint with ones which
// will flag all possible keys as 'possible', ensuring a correct result.
memset(filter->fingerprints, ~0, 3 * filter->blockLength * sizeof(uint16_t));
// the cosmic-ray probability (i.e., a cosmic ray corrupts your system).
free(sets);
free(Q);
free(stack);
return true;
return false;
}

memset(sets, 0, sizeof(xor_xorset_t) * arrayLength);
Expand Down

0 comments on commit 8e84f87

Please sign in to comment.