In [1]:
pip install mmh3

Collecting mmh3
  Downloading mmh3-3.0.0-cp37-cp37m-manylinux2010_x86_64.whl (50 kB)
[?25l[K     |██████▍                         | 10 kB 24.6 MB/s eta 0:00:01[K     |████████████▉                   | 20 kB 30.6 MB/s eta 0:00:01[K     |███████████████████▎            | 30 kB 31.8 MB/s eta 0:00:01[K     |█████████████████████████▊      | 40 kB 35.4 MB/s eta 0:00:01[K     |████████████████████████████████| 50 kB 6.0 MB/s 
[?25hInstalling collected packages: mmh3
Successfully installed mmh3-3.0.0


In [2]:
pip install bitarray

Collecting bitarray
  Downloading bitarray-2.3.4.tar.gz (88 kB)
[?25l[K     |███▊                            | 10 kB 22.6 MB/s eta 0:00:01[K     |███████▍                        | 20 kB 29.2 MB/s eta 0:00:01[K     |███████████▏                    | 30 kB 29.8 MB/s eta 0:00:01[K     |██████████████▉                 | 40 kB 25.2 MB/s eta 0:00:01[K     |██████████████████▋             | 51 kB 26.1 MB/s eta 0:00:01[K     |██████████████████████▎         | 61 kB 27.8 MB/s eta 0:00:01[K     |██████████████████████████      | 71 kB 24.5 MB/s eta 0:00:01[K     |█████████████████████████████▊  | 81 kB 25.9 MB/s eta 0:00:01[K     |████████████████████████████████| 88 kB 5.8 MB/s 
[?25hBuilding wheels for collected packages: bitarray
  Building wheel for bitarray (setup.py) ... [?25l[?25hdone
  Created wheel for bitarray: filename=bitarray-2.3.4-cp37-cp37m-linux_x86_64.whl size=171944 sha256=111bee4d90c5cdb43c14727f6d4abef722c5b0bac24d38e6bcb2ca05f5ec8baa
  Stored in directo

##BloomFilter

In [3]:
import math
import mmh3
from bitarray import bitarray


class BloomFilter(object):

	'''
	Class for Bloom filter, using murmur3 hash function
	'''

	def __init__(self, items_count, fp_prob):
		'''
		items_count : int
			Number of items expected to be stored in bloom filter
		fp_prob : float
			False Positive probability in decimal
		'''
		# False possible probability in decimal
		self.fp_prob = fp_prob

		# Size of bit array to use
		self.size = self.get_size(items_count, fp_prob)

		# number of hash functions to use
		self.hash_count = self.get_hash_count(self.size, items_count)

		# Bit array of given size
		self.bit_array = bitarray(self.size)

		# initialize all bits as 0
		self.bit_array.setall(0)

	def add(self, item):
		'''
		Add an item in the filter
		'''
		digests = []
		for i in range(self.hash_count):

			# create digest for given item.
			# i work as seed to mmh3.hash() function
			# With different seed, digest created is different
			digest = mmh3.hash(item, i) % self.size
			digests.append(digest)

			# set the bit True in bit_array
			self.bit_array[digest] = True

	def check(self, item):
		'''
		Check for existence of an item in filter
		'''
		for i in range(self.hash_count):
			digest = mmh3.hash(item, i) % self.size
			if self.bit_array[digest] == False:
				'''
				 if any of bit is False then,its not present
				 in filter
				 else there is probability that it exist
				'''
				return False
		return True

	@classmethod
	def get_size(self, n, p):
		'''
		Return the size of bit array(m) to used using
		following formula
		m = -(n * lg(p)) / (lg(2)^2)
		n : int
			number of items expected to be stored in filter
		p : float
			False Positive probability in decimal
		'''
		m = -(n * math.log(p))/(math.log(2)**2)
		return int(m)

	@classmethod
	def get_hash_count(self, m, n):
		'''
		Return the hash function(k) to be used using
		following formula
		k = (m/n) * lg(2)

		m : int
			size of bit array
		n : int
			number of items expected to be stored in filter
		'''
		k = (m/n) * math.log(2)
		return int(k)


Is Vacinated or not

In [6]:
from random import shuffle


n = 45 #no of items to add
p = 0.01 #false positive probability

bl = BloomFilter(n,p)
print("Size of bit array: ",bl.size)
print("False positive Probability: ", bl.fp_prob)
print("Number of hash functions: ", bl.hash_count)

# words to be added
Vaccinated = ['Aditi','Musunur','Advitiya ','Sujeet','Alagesan','Poduri','Amrish ','Ilyas',	
                'Aprativirya ','Seshan','Asvathama ','Ponnada','Avantas ','Ghosal','Avidosa','Vaisakhi',
                'Barsati ','Sandipa','Debasis ','Sundhararajan','Devasru ','Subramanyan',
                'Dharmadhrt',' Ramila','Dhritiman ','Salim','Gopa ','Trilochana','Hardeep','Suksma',
                'Jayadev ','Mitali','Jitendra ','Choudhary','Kalyanavata','Veerender',
                'Naveen',' Tikaram','Vijai ','Sritharan']

# word not added
Not_Vaccinated = ['Abdullah','Tamara', 'Abdul','Maya','Jai','Tara', 'Rohan', 'Ana'
                   'Ajay', 'Aisha', 'Ram','Alisha','Sanjay', 'Anya', 'Ravi', 'Lila','Arman'
									  ,'Amit','Fatima','Sandeep',' Anika',' Vijay','Anita','Rahul','Trisha',
										'Ira',' Aryan',' Ibrahim',' Anjali',' Ashwin',' Jasmin',' Kiran',' Priya','Krish',
										'Asha','Arjun','Isha','Rajesh',
										'Riya','Dev',' Mira','Deepak','Shyla','Arun','Mara','Anand']

for i in Vaccinated:
	bl.add(i)

shuffle(Vaccinated)
shuffle(Not_Vaccinated)

ToBeTested = Vaccinated[:36] + Not_Vaccinated[:9]
shuffle(ToBeTested)

for i in ToBeTested:
	if bl.check(i):
		if i in Vaccinated:
			print(i," is probably Vaccinated")
		else:
			print(i," is NOT Vaccinated but is found to be Vaccinated Falsely(False Positive)")
	else:
		print(i," is definitely not Vaccinated!")


Size of bit array:  431
False positive Probability:  0.01
Number of hash functions:  6
 Ramila  is probably Vaccinated
Ponnada  is probably Vaccinated
Aditi  is probably Vaccinated
Ghosal  is probably Vaccinated
Dharmadhrt  is probably Vaccinated
Barsati   is probably Vaccinated
Advitiya   is probably Vaccinated
 Mira  is definitely not Vaccinated!
Jitendra   is probably Vaccinated
Subramanyan  is probably Vaccinated
Vijai   is probably Vaccinated
Dhritiman   is probably Vaccinated
Avantas   is probably Vaccinated
Amrish   is probably Vaccinated
Vaisakhi  is probably Vaccinated
Asvathama   is probably Vaccinated
Alagesan  is probably Vaccinated
Mitali  is probably Vaccinated
 Anika  is definitely not Vaccinated!
Devasru   is probably Vaccinated
Riya  is definitely not Vaccinated!
Sandipa  is probably Vaccinated
Hardeep  is probably Vaccinated
Choudhary  is probably Vaccinated
 Tikaram  is probably Vaccinated
Poduri  is probably Vaccinated
Ilyas  is probably Vaccinated
Rajesh  is defini