In [16]:
!pip install bitarray



In [17]:
import hashlib
from bitarray import bitarray

In [19]:


class BloomFilter:
    def __init__(self, size, hash_functions):
        self.size = size
        self.hash_functions = hash_functions
        self.bit_array = bitarray(size)
        self.bit_array.setall(0)

    def add(self, item):
        for i in range(self.hash_functions):
            index = int(hashlib.sha256(f"{item}{i}".encode()).hexdigest(), 16) % self.size
            self.bit_array[index] = 1

    def contains(self, item):
        for i in range(self.hash_functions):
            index = int(hashlib.sha256(f"{item}{i}".encode()).hexdigest(), 16) % self.size
            if self.bit_array[index] == 0:
                return False
        return True

if __name__ == "__main__":
    size = 1000  # The size of the bit array
    hash_functions = 5  # The number of hash functions to use

    bloom_filter = BloomFilter(size, hash_functions)

    # Add items to the Bloom filter
    items_to_add = ['abound','abounds','abundance','abundant','accessible',
                'bloom','blossom','bolster','bonny','bonus','bonuses',
                'coherent','cohesive','colorful','comely','comfort',
                'gems','generosity','generous','generously','genial']
    for item in items_to_add:
        bloom_filter.add(item)

    # Check for membership
    items_to_check = ['bluff','cheater','hate','war','humanity',
               'racism','hurt','nuke','gloomy','facebook',
               'twitter','blossom','bolster','bonny','bonus','bonuses',
                'coherent','cohesive','colorful']
    for item in items_to_check:
        if bloom_filter.contains(item):
            print(f"{item} is in the set.")
        else:
            print(f"{item} is not in the set.")


bluff is not in the set.
cheater is not in the set.
hate is not in the set.
war is not in the set.
humanity is not in the set.
racism is not in the set.
hurt is not in the set.
nuke is not in the set.
gloomy is not in the set.
facebook is not in the set.
twitter is not in the set.
blossom is in the set.
bolster is in the set.
bonny is in the set.
bonus is in the set.
bonuses is in the set.
coherent is in the set.
cohesive is in the set.
colorful is in the set.


In [20]:
import math

def calculate_bit_array_size(expected_elements, false_positive_probability):
    m = -1 * (expected_elements * math.log(false_positive_probability)) / (math.log(2) ** 2)
    return int(m)

def calculate_num_hash_functions(bit_array_size, expected_elements):
    k = (bit_array_size / expected_elements) * math.log(2)
    return int(k)

expected_elements = 1000  # The expected number of elements
false_positive_probability = 0.01  # Desired false positive probability (1%)

bit_array_size = calculate_bit_array_size(expected_elements, false_positive_probability)
num_hash_functions = calculate_num_hash_functions(bit_array_size, expected_elements)

print(f"Bit Array Size (m): {bit_array_size}")
print(f"Number of Hash Functions (k): {num_hash_functions}")


Bit Array Size (m): 9585
Number of Hash Functions (k): 6
