## Bloom Filter
---

---
#### Off the Shelf:
---

In [2]:
:dep bloomfilter

In [3]:
use bloomfilter::Bloom;

In [4]:
let mut bf: Bloom<str> = Bloom::new_for_fp_rate(1_000_000, 0.001);

In [5]:
bf.set("test1");
bf.set("test3");

In [6]:
(0..10).map(|x| bf.check(format!("test{x}").as_ref())).enumerate().collect::<Vec<_>>()

[(0, false), (1, true), (2, false), (3, true), (4, false), (5, false), (6, false), (7, false), (8, false), (9, false)]

---
#### Implementation:
---

In [7]:
:dep bit-vec
:dep xxhash-rust = {features=["xxh3"]}

In [8]:
use bit_vec::BitVec;
use xxhash_rust::xxh3::xxh3_64;

In [9]:
#[derive(Debug)]
struct BloomFilter<T> {
    bv: BitVec,
    num_bits: usize,
    num_hashes: u8,
    key_type: std::marker::PhantomData<T>,
}

In [10]:
impl BloomFilter<&[u8]> {
    
    fn new(num_bits: usize, num_hashes: u8) -> Self {
        BloomFilter {
            bv: BitVec::from_elem(num_bits, false),
            num_bits: num_bits,
            num_hashes: num_hashes,
            key_type: std::marker::PhantomData::<&[u8]>,
        }
    }

    fn hash(&self, key: &[u8], hash_number: u8) -> usize {
        (xxh3_64(&[key, &[hash_number]].concat()) % (self.num_bits as u64)) as usize
    }
    
    fn insert(&mut self, key: &[u8]) {
        for hash_number in 0..self.num_hashes {
            let hash_value = self.hash(key, hash_number);
            self.bv.set(hash_value, true);
        }
    }

    fn check(&mut self, key: &[u8]) -> bool {
        for hash_number in 0..self.num_hashes {
            let hash_value = self.hash(key, hash_number);
            if !self.bv.get(hash_value).unwrap() {
                return false;
            }
        }
        true
    }
    
}

In [11]:
let mut q = BloomFilter::<&[u8]>::new(64, 3);

In [12]:
q.insert(b"test_key_1");
q.insert(b"test_key_3");

In [13]:
(0..10).map(|x| q.check(format!("test_key_{x}").as_ref())).enumerate().collect::<Vec<_>>()

[(0, false), (1, true), (2, false), (3, true), (4, false), (5, false), (6, false), (7, false), (8, false), (9, false)]

In [14]:
q

BloomFilter { bv: 0000010110000000000000000000000000000001000000000000010000000000, num_bits: 64, num_hashes: 3, key_type: PhantomData<&[u8]> }