# Set Membership

The cell below defines two **abstract classes**: the first represents a set and basic insert/search operations on it. You will need to impement this API four times, to implement (1) sequential search, (2) binary search tree, (3) balanced search tree, and (4) bloom filter. The second defines the synthetic data generator you will need to implement as part of your experimental framework. <br><br>**Do NOT modify the next cell** - use the dedicated cells further below for your implementation instead. <br>

In [2]:
# DO NOT MODIFY THIS CELL

from abc import ABC, abstractmethod  

# abstract class to represent a set and its insert/search operations
class AbstractSet(ABC):
    
    # constructor
    @abstractmethod
    def __init__(self):
        pass           
        
    # inserts "element" in the set
    # returns "True" after successful insertion, "False" if the element is already in the set
    # element : str
    # inserted : bool
    @abstractmethod
    def insertElement(self, element):     
        inserted = False
        return inserted   
    
    # checks whether "element" is in the set
    # returns "True" if it is, "False" otherwise
    # element : str
    # found : bool
    @abstractmethod
    def searchElement(self, element):
        found = False
        return found    
    
    
    
# abstract class to represent a synthetic data generator
class AbstractTestDataGenerator(ABC):
    
    # constructor
    @abstractmethod
    def __init__(self):
        pass           
        
    # creates and returns a list of length "size" of strings
    # size : int
    # data : list<str>
    @abstractmethod
    def generateData(self, size):     
        data = [""]*size
        return data   


Use the cell below to define any auxiliary data structure and python function you may need. Leave the implementation of the main API to the next code cells instead.

In [2]:
# ADD AUXILIARY DATA STRUCTURE DEFINITIONS AND HELPER CODE HERE

# Bloom filter helper code:

# BST helper code:
class NodeBST():
    def __init__(self, value):
        self.value = value
        self.right_node=None
        self.left_node  = None
      
    def insert(self, value):
        if value > self.value and self.right_node:
            self.right_node.insert(value)
        elif value < self.value and self.left_node:
            self.left_node.insert(value)
        elif value > self.value:
            new_node  = NodeBST(value)
            self.right_node = new_node
            return True
        elif value < self.value:
            new_node  = NodeBST(value)
            self.left_node = new_node
            return True
        return False
      
    def search(self, value):
        if value > self.value and self.right_node:
            return self.right_node.search(value)
        elif value < self.value and self.left_node:
            return self.left_node.search(value)
        
        elif (value == self.value):
            return True
        else:
            return False
    
    def print_tree(self):
        
        print(self.value)
        
        if self.left_node:
            self.left_node.print_tree()
            
        if self.right_node:
            self.right_node.print_tree()

In [122]:
class Node:
    def __init__(self, data=None, next=None):
        self.data = data
        self.next = next

class SequentialSearchSet(AbstractSet):
    def __init__(self):
        self.head = None

    def insertElement(self, element):
        node = Node(element, self.head)
        self.head = node
    
    #def insertElement(self, element):
        #if self.head is None:
            #self.head = Node(element, None)
            #return
            
        
        #node = self.head
        #while node.next is not None:
            #node = node.next
        
        #node.next = Node(element, None)


    def searchElement(self, element):
        search_node = self.head
        while search_node is not None:
            if search_node.data == element:
                return True
            search_node = search_node.next
        return False
    
    def print(self):
        if self.head is None:
            print("Empty linked list")
            return

        node = self.head # Iterator will start at the beginning of the linked list
        linked_list_string = '' # Linked list string to print out the list of elements
        while node is not None:
            linked_list_string += str(node.data) + '-->'
            node = node.next # Following the linked list and iterating through elements one by one

        print(linked_list_string)


In [123]:
linked_list = SequentialSearchSet()
linked_list.insertElement(5)
linked_list.insertElement(2)
linked_list.print()
linked_list.searchElement(5)

2-->5-->


True

Use the cell below to implement the requested API by means of **sequential search**.

In [17]:
class SequentialSearchSet(AbstractSet):
    
    def __init__(self):
        self.words = []
                
    def insertElement(self, element):
        inserted = False
        self.words.append(element)
        inserted = True
      
        return inserted
    

    def searchElement(self, element):
        
        found = False
        for value in self.words:
            if value == element:
                found = True
        
        return found

In [125]:
def test_search():
    sequential_search = SequentialSearchSet("test-search.txt")
    search_element = "hello"

    if sequential_search.searchElement(search_element):
        print(f"{search_element} is in the file")
    else:
        print(f"{search_element} is not in the file")
        

In [126]:
def mobydick():
    sequential_search = SequentialSearchSet("test1-mobydick.txt")
    search_element = "prairies"

    if sequential_search.searchElement(search_element):
        print(f"{search_element} is in the file")
    else:
        print(f"{search_element} is not in the file")

In [127]:
def dickens():
    sequential_search = SequentialSearchSet("test3-dickens.txt")
    search_element = "winter is coming"

    if sequential_search.searchElement(search_element):
        print(f"{search_element} is in the file")
    else:
        print(f"{search_element} is not in the file")

In [128]:
def warpeace():
    sequential_search = SequentialSearchSet("test2-warpeace.txt")
    search_element = "war"

    if sequential_search.searchElement(search_element):
        print(f"{search_element} is in the file")
    else:
        print(f"{search_element} is not in the file")

In [129]:
import timeit

time_search = timeit.timeit(test_search, number=10)
print("Time taken: ", time_search/10, "seconds")

FileNotFoundError: [Errno 2] No such file or directory: 'test-search.txt'

In [None]:
time_mobydick = timeit.timeit(mobydick, number=10)
print("Time taken: ", time_mobydick/10, "seconds")

FileNotFoundError: [Errno 2] No such file or directory: 'test1-mobydick.txt'

In [None]:
time_warpeace = timeit.timeit(warpeace, number=10)
print("Time taken: ", time_warpeace/10, "seconds")

In [None]:
time_dickens = timeit.timeit(dickens, number=100)
print("Time taken: ", time_dickens/100, "seconds")

Use the cell below to implement the requested API by means of **binary search tree**.

In [3]:
class BinarySearchTreeSet(AbstractSet):
    
    def __init__(self):
        self.root = NodeBST("0")
        
    def insertElement(self, element):
        inserted = False
        inserted = self.root.insert(element)
        return inserted

    def searchElement(self, element):     
        found = False
        found = self.root.search(element)
        return found    

Use the cell below to implement the requested API by means of **balanced search tree**.

In [4]:
# Time complexity = O(logN)
# Space complexity = O(n)
class BalancedNode:
    def __init__(self, value):
        self.left = None
        self.parent = None
        self.right = None
        self.value = value
        self.colour = "R"


class BalancedSearchTreeSet(AbstractSet):
    def __init__(self):
        self.empty = BalancedNode("xyz")
        self.empty.colour = "B"
        self.empty.left = None
        self.empty.right = None
        self.root = self.empty
        

    def insertElement(self, element):
        inserted = False
        node = BalancedNode(element)
        node.parent = None
        node.value = element
        node.colour = "R"
        node.left = self.empty
        node.right = self.empty

        tempval = None
        rt = self.root

        while rt != self.empty:
            tempval = rt
            if node.value < rt.value:
                rt = rt.left
            else:
                rt = rt.right

        node.parent = tempval

        if tempval == None:
            self.root = node
        elif node.value < tempval.value:
            tempval.left = node
        else:
            tempval.right = node

        if node.parent == None:
            node.colour = "B"
            inserted = True
            return inserted

        if node.parent.parent == None:
            inserted = True
            return inserted

        self.insertcontinue(node)
        return inserted

    def search(self, node, value):
        found = False
        if node == self.empty:
            return found
        elif value == node.value:
            found = True
            return found
        if value < node.value:
            return self.search(node.left, value)
        return self.search(node.right, value)

    def leftrotate(self, rotateval):
        rchild = rotateval.right
        rotateval.right = rchild.left
        if rchild.left != self.empty:
            rchild.left.parent = rotateval
        rchild.parent = rotateval.parent
        if rotateval.parent == None:
            self.root = rchild
        elif rotateval == rotateval.parent.left:
            rotateval.parent.left = rchild
        else:
            rotateval.parent.right = rchild
        rchild.left = rotateval
        rotateval.parent = rchild

    def rightrotate(self, rotateval):
        lchild = rotateval.left
        rotateval.left = lchild.right
        if lchild.right != self.empty:
            lchild.right.parent = rotateval
        lchild.parent = rotateval.parent
        if rotateval.parent == None:
            self.root = lchild
        elif rotateval == rotateval.parent.right:
            rotateval.parent.right = lchild
        else:
            rotateval.parent.left = lchild
        lchild.right = rotateval
        rotateval.parent = lchild

    def colourswap(self, a, l):
        if a.colour == "R":
            a.colour = "B"
            l.parent.colour = "B"
            l.parent.parent.colour = "R"
            l = l.parent.parent

    def insertcontinue(self, leaf):
        while leaf.parent.colour == "R":
            if leaf.parent == leaf.parent.parent.right:
                aunt = leaf.parent.parent.left
                if aunt.colour == "R":
                    self.colourswap(aunt, leaf)
                    leaf = leaf.parent.parent
                else:
                    if leaf == leaf.parent.left:
                        leaf = leaf.parent
                        self.rightrotate(leaf)
                    leaf.parent.colour = "B"
                    leaf.parent.parent.colour = "R"
                    self.leftrotate(leaf.parent.parent)
            else:
                aunt = leaf.parent.parent.right
                if aunt.colour == "R":
                    self.colourswap(aunt, leaf)
                    leaf = leaf.parent.parent
                else:
                    if leaf == leaf.parent.right:
                        leaf = leaf.parent
                        self.leftrotate(leaf)
                    leaf.parent.colour = "B"
                    leaf.parent.parent.colour = "R"
                    self.rightrotate(leaf.parent.parent)
            if leaf == self.root:
                break
            self.root.colour = "B"

            # intended order of values : (3,1,5,7,6,8,9,10)

    def searchElement(self, element):
        return self.search(self.root, element)

    def printTree(self, node, last):
        if node != self.empty:
            if last:
                print("R: ", end=" ")

            else:
                print("L: ", end=" ")
            s_color = "RED" if node.colour == "R" else "BLACK"
            print(str(node.value) + "(" + s_color + ")")
            self.printTree(node.left, False)
            self.printTree(node.right, True)

    # Function to call print
    def display(self):
        self.printTree(self.root, True)

# if __name__ == "__main__":
#         tree = BalancedSearchTreeSet()
#         tree.insertElement("spaces")
#         tree.insertElement("main")
#         tree.insertElement("store")
#         tree.insertElement("python")
#         tree.insertElement("display")
#         tree.insertElement("navigate")
#         tree.insertElement("window")
#         tree.insertElement("packages")

#         tree.display()

#         a = tree.searchElement("hello")
#         b = tree.searchElement("window")
#         print(a)
#         print(b)

# # if __name__ == "__main__":
# #     tree = BalancedSearchTreeSet()
# #     file = open("./test3-dickens.txt", "r")
# #     test_file = open("./test-search.txt", "r")
# #     for line in file:
# #         for word in line.split():
# #             tree.insert(word)
# #     for word in test_file:
# #         word = word.strip()
# #         if not tree.searchtree(word):

# #             print(word, " is not present in the tree")

Use the cell below to implement the requested API by means of **bloom filter**.

In [5]:
class BloomFilterSet(AbstractSet):
    
    def __init__(self):
        # ADD YOUR CODE HERE
        self.size = 9161520
        self.hash_count = 6
        self.bit_array = [0] * self.size
        self.hash_functions = self.generate_hash_functions()
    
    def generate_hash_functions(self): # Code to generate a list of unique hash functions
            hash_fuctions = []
            for i in range(1,self.hash_count+1):
                hash_fuctions.append(self.generate_hash_function(i))
            return hash_fuctions
    
    def generate_hash_function(self,seed): # Code to generate a unique hash function based on an inputed seed value
        def hash_function(value):
            return hash(value + str(seed)) % self.size
        return hash_function
        
    def insertElement(self, element):
        inserted = False
        # ADD YOUR CODE HERE
        for hash_function in self.hash_functions:
            self.bit_array[hash_function(element)] = 1
        inserted = True
        
        return inserted

    def searchElement(self, element):     
        found = False
        # ADD YOUR CODE HERE
        for hash_function in self.hash_functions:
            if self.bit_array[hash_function(element)] == 0:
                return found
        found = True
        return found    

Use the cell below to implement the **synthetic data generator** as part of your experimental framework.

In [19]:
import string
import random
from collections import deque

class TestDataGenerator(AbstractTestDataGenerator):
    
    def __init__(self, punctuation=False, length=7, lower=True, upper=True, digits=True):
        self.length = length
        self.lower = lower
        self.upper = upper
        self.digits = digits
        self.punctuation = punctuation

    def getCharacters(self):
        character = ''
        if self.lower:
            character += string.ascii_lowercase
        if self.upper:
            character += string.ascii_uppercase
        if self.digits:
            character += string.digits
        if self.punctuation:
            character += string.punctuation

        return character

        
    def generateData(self, amount):     
        data = []
        characters = self.getCharacters()

        for i in range(amount):
            data.append(''.join(random.choices(characters, k=random.randint(3,self.length))))
        
        return data
    
    def generateSortedBinary(self, size):     # this is the worst case scenario for BST and balanced
        return sorted(self.generateData(size))

    def generateBestCaseBinary(self, size):

        class BSTBuilder:
            def __init__(self, value):
                self.value = value
                self.left = None
                self.right = None

        def build_balanced_bst(strings):
            if not strings:
                return None
            
            mid = len(strings) // 2
            root = BSTBuilder(strings[mid])
            root.left = build_balanced_bst(strings[:mid])
            root.right = build_balanced_bst(strings[mid+1:])
            return root

        def generate_balanced_bst_data():
            result = []
            root = build_balanced_bst(self.generateSortedBinary(size))

            queue = deque([root])
            current_level = 1
            next_level = 0
            while queue:
                node = queue.popleft()
                print(node.value, end=' ')
                result.append(node.value)
                current_level -= 1
                
                if node.left:
                    queue.append(node.left)
                    next_level += 1
                if node.right:
                    queue.append(node.right)
                    next_level += 1
                
                if current_level == 0:
                    print('\n')
                    current_level = next_level
                    next_level = 0
            return result

        return generate_balanced_bst_data()


    

test = TestDataGenerator(False, 7, True, False)
print(test.generateData(10))
print(test.generateSortedBinary(5))
print(test.generateBestCaseBinary(15))



['89g6nau', 'fxfgu0o', '2g5azx1', 'kc05hi0', 'mq2ttxg', 'vq1m7', 'dz9v3h', 'p1eze7', '2636s7', 'plnn']
['2rbk0r', 'gi6k77', 'o1y', 'txa', 'wmq4']
g317dv 

bznwi l5up5m 

8iq dp3o jpcro qv0a179 

462oqho b2g5 dd8 eefu1gv ggn3 jt41 oqa4ab9 v3z0f 

['g317dv', 'bznwi', 'l5up5m', '8iq', 'dp3o', 'jpcro', 'qv0a179', '462oqho', 'b2g5', 'dd8', 'eefu1gv', 'ggn3', 'jt41', 'oqa4ab9', 'v3z0f']
['g317dv', 'bznwi', 'l5up5m', '8iq', 'dp3o', 'jpcro', 'qv0a179', '462oqho', 'b2g5', 'dd8', 'eefu1gv', 'ggn3', 'jt41', 'oqa4ab9', 'v3z0f']


Use the cells below for the python code needed to **fully evaluate your implementations**, first on real data and subsequently on synthetic data (i.e., read data from test files / generate synthetic one, instantiate each of the 4 set implementations in turn, then thorouhgly experiment with insert/search operations and measure their performance).

In [18]:
import timeit

# ADD YOUR TEST CODE HERE TO WORK ON REAL DATA
iterations= 1

def bloomfilter_insert(bloomfilter,file):
    for line in file:
        for word in line.split():
            bloomfilter.insertElement(word)

def bloomfilter_search(bf):
    for word in test_search:
        word = word.strip()
        if (bf.searchElement(word)):
            # print("found word: " + word)
            pass
        else:
            pass

def balanced_tree_insert(balanced_tree,file):
    for line in file:
        for word in line.split():
            balanced_tree.insertElement(word)
def balanced_tree_search(balanced_tree):
    for word in test_search:
        word = word.strip()
        if (balanced_tree.searchElement(word)):
            # print("found word: " + word)
            pass
        else:
            pass  

def bst_insert(BinarySearchTree,file):
    for line in file:
        for word in line.split():
            BinarySearchTree.insertElement(word)

def bst_search(binarySearch):
    for word in test_search:
        word = word.strip()
        if (binarySearch.searchElement(word)):
            # print("found word: " + word)
            pass
        else:
            pass

def sequential_search_insert(sequential_search,file):
    for line in file:
        for word in line.split():
            sequential_search.insertElement(word) 

def sequential_search_search(sequential_search):
    for word in test_search:
        word = word.strip()
        if (sequential_search.searchElement(word)):
            # print("found word: " + word)
            pass
        else:
            pass
        
def sequential_search_test_1():#Sequential search test
    sequential_search = SequentialSearchSet()
    sequential_search_insert_time_taken = timeit.timeit(lambda: sequential_search_insert(sequential_search, test_file_1), number=iterations)
    print("Sequential search insert time taken for file 1" + ":", sequential_search_insert_time_taken/iterations)
    sequential_search_search_time_taken = timeit.timeit(lambda: sequential_search_search(sequential_search), number=iterations)
    print("Sequential search search time taken for file 1" + ":", sequential_search_search_time_taken/iterations)

def sequential_search_test_2():
    sequential_search = SequentialSearchSet()
    sequential_search_insert_time_taken = timeit.timeit(lambda: sequential_search_insert(sequential_search, test_file_2), number=iterations)
    print("Sequential search insert time taken for file 2" + ":", sequential_search_insert_time_taken/iterations)
    sequential_search_search_time_taken = timeit.timeit(lambda: sequential_search_search(sequential_search), number=iterations)
    print("Sequential search search time taken for file 2" + ":", sequential_search_search_time_taken/iterations)

def sequential_search_test_3():
    sequential_search = SequentialSearchSet()
    sequential_search_insert_time_taken = timeit.timeit(lambda: sequential_search_insert(sequential_search, test_file_3), number=iterations)
    print("Sequential search insert time taken for file 3" + ":", sequential_search_insert_time_taken/iterations)
    sequential_search_search_time_taken = timeit.timeit(lambda: sequential_search_search(sequential_search), number=iterations)
    print("Sequential search search time taken for file 3" + ":", sequential_search_search_time_taken/iterations)


def bf_test_1():  #Bloom filter test
    bloomfilter = BloomFilterSet()
    bf_insert_time_taken = timeit.timeit(lambda: bloomfilter_insert(bloomfilter, test_file_1), number=iterations)
    print("Bloom filter insert time taken for file 1" + ":", bf_insert_time_taken/iterations)
    bf_search_time_taken = timeit.timeit(lambda: bloomfilter_search(bloomfilter), number=iterations)
    print("Bloom filter search time taken for file 1" + ":", bf_search_time_taken/iterations)

def bf_test_2():
    bloomfilter = BloomFilterSet()
    bf_insert_time_taken = timeit.timeit(lambda: bloomfilter_insert(bloomfilter, test_file_2), number=iterations)
    print("Bloom filter insert time taken for file 2" + ":", bf_insert_time_taken/iterations)
    bf_search_time_taken = timeit.timeit(lambda: bloomfilter_search(bloomfilter), number=iterations)
    print("Bloom filter search time taken for file 2" + ":", bf_search_time_taken/iterations)

def bf_test_3():
    bloomfilter = BloomFilterSet()
    bf_insert_time_taken = timeit.timeit(lambda: bloomfilter_insert(bloomfilter, test_file_3), number=iterations)
    print("Bloom filter insert time taken for file 3" + ":", bf_insert_time_taken/iterations)
    bf_search_time_taken = timeit.timeit(lambda: bloomfilter_search(bloomfilter), number=iterations)
    print("Bloom filter search time taken for file 3" + ":", bf_search_time_taken/iterations)



def bst_test_1(): #Binary search tree test
    BSTree = BinarySearchTreeSet()
    time_for_insert = timeit.timeit(lambda: bst_insert(BSTree, test_file_1), number=iterations)
    print("Binary tree insert time for for file 1: ", time_for_insert/iterations)
    time_for_search = timeit.timeit(lambda: bst_search(BSTree), number=iterations)
    print("Binary tree search time taken for file 1: ", time_for_search/iterations)
    
def bst_test_2():
    BSTree = BinarySearchTreeSet()
    time_for_insert = timeit.timeit(lambda: bst_insert(BSTree, test_file_2), number=iterations)
    print("Binary tree insert time for for file 2: ", time_for_insert/iterations)
    time_for_search = timeit.timeit(lambda: bst_search(BSTree), number=iterations)
    print("Binary tree search time taken for file 2: ", time_for_search/iterations)
    
def bst_test_3():
    BSTree = BinarySearchTreeSet()
    time_for_insert = timeit.timeit(lambda: bst_insert(BSTree, test_file_3), number=iterations)
    print("Binary tree insert time for for file 3: ", time_for_insert/iterations)
    time_for_search = timeit.timeit(lambda: bst_search(BSTree), number=iterations)
    print("Binary tree search time taken for file 3 ", time_for_search/iterations)



def balanced_tree_test_1(): #Balanced tree test
    tree = BalancedSearchTreeSet()
    bt_insert_time_taken = timeit.timeit(lambda: balanced_tree_insert(tree, test_file_1), number=iterations)
    print("Balanced tree insert time taken for file 1" + ":", bt_insert_time_taken/iterations)
    bt_search_time_taken = timeit.timeit(lambda: balanced_tree_search(tree), number=iterations)
    print("Balanced tree search time taken for file 1" + ":", bt_search_time_taken/iterations)

def balanced_tree_test_2():
    tree = BalancedSearchTreeSet()
    balance_bt_insert_time_taken = timeit.timeit(lambda: balanced_tree_insert(tree, test_file_2), number=iterations)
    print("Balanced tree insert time taken for file 2" + ":", balance_bt_insert_time_taken/iterations)
    balance_bt_search_time_taken = timeit.timeit(lambda: balanced_tree_search(tree), number=iterations)
    print("Balanced tree search time taken for file 2" + ":", balance_bt_search_time_taken/iterations)

def balanced_tree_test_3():
    tree = BalancedSearchTreeSet()
    balance_bt_insert_time_taken = timeit.timeit(lambda: balanced_tree_insert(tree, test_file_3), number=iterations)
    print("Balanced tree insert time taken for file 3" + ":", balance_bt_insert_time_taken/iterations)
    balance_bt_search_time_taken = timeit.timeit(lambda: balanced_tree_search(tree), number=iterations)
    print("Balanced tree search time taken for file 3" + ":", balance_bt_search_time_taken/iterations)

test_file_1 = open("./testfiles/test1-mobydick.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
sequential_search_test_1()
print("--------------------------------------------------")
test_file_1.close()
test_search.close()
test_file_2 = open("./testfiles/test2-warpeace.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
sequential_search_test_2()
print("--------------------------------------------------")
test_file_2.close()
test_search.close()
test_file_3 = open("./testfiles/test3-dickens.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
sequential_search_test_3()
print("--------------------------------------------------")
test_file_3.close()
test_search.close()    
    

test_file_1 = open("./testfiles/test1-mobydick.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
bf_test_1()
print("--------------------------------------------------")
test_file_1.close()
test_search.close()
test_file_2 = open("./testfiles/test2-warpeace.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
bf_test_2()
print("--------------------------------------------------")
test_file_2.close()
test_search.close()
test_file_3 = open("./testfiles/test3-dickens.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
bf_test_3()
test_file_3.close()
test_search.close()
print("--------------------------------------------------")

test_file_1 = open("./testfiles/test1-mobydick.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
bst_test_1()
print("--------------------------------------------------")
test_file_1.close()
test_search.close()
test_file_2 = open("./testfiles/test2-warpeace.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
bst_test_2()
print("--------------------------------------------------")
test_file_2.close()
test_search.close()
test_file_3 = open("./testfiles/test3-dickens.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
bst_test_3()
print("--------------------------------------------------")
test_file_3.close()
test_search.close()


test_file_1 = open("./testfiles/test1-mobydick.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
balanced_tree_test_1()
print("--------------------------------------------------")
test_file_1.close()
test_search.close()
test_file_2 = open("./testfiles/test2-warpeace.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
balanced_tree_test_2()
print("--------------------------------------------------")
test_file_2.close()
test_search.close()
test_file_3 = open("./testfiles/test3-dickens.txt", "r")
test_search = open("./testfiles/test-search.txt", "r")
balanced_tree_test_3()
print("--------------------------------------------------")
test_file_3.close()
test_search.close()
   
    






Sequential search insert time taken for file 1: 0.024424750008620322
Sequential search search time taken for file 1: 1.890895374934189
--------------------------------------------------
Sequential search insert time taken for file 2: 0.06325350003316998
Sequential search search time taken for file 2: 5.441687832935713
--------------------------------------------------
Sequential search insert time taken for file 3: 0.5859553749905899
Sequential search search time taken for file 3: 44.10061591700651
--------------------------------------------------
Bloom filter insert time taken for file 1: 0.3065829579718411
Bloom filter search time taken for file 1: 0.0010708750924095511
--------------------------------------------------
Bloom filter insert time taken for file 2: 0.7981658329954371
Bloom filter search time taken for file 2: 0.0012041250010952353
--------------------------------------------------
Bloom filter insert time taken for file 3: 7.525427374988794
Bloom filter search time tak

In [15]:
import timeit
import random

iterations = 1
test_search = open("./testfiles/test-search.txt", "r")

def BST_insert(BinarySearchTree,file):
    for line in file:
        for word in line.split():
            BinarySearchTree.insertElement(word)

def BST_search(binarySearch):
    for word in test_search:
        word = word.strip()
        if (binarySearch.searchElement(word)):
            # print("found word: " + word)
            pass
        else:
            pass

def bst_test_1():
    BSTree = BinarySearchTreeSet()
    time_for_insert = timeit.timeit(lambda: BST_insert(BSTree, test_file_1), number=iterations)
    print("Time taken for inserting for test 1: ", time_for_insert/iterations)
    time_for_search = timeit.timeit(lambda: BST_search(BSTree), number=iterations)
    print("Time taken for searching for test 1: ", time_for_search/iterations)
    
def bst_test_2():
    BSTree = BinarySearchTreeSet()
    time_for_insert = timeit.timeit(lambda: BST_insert(BSTree, test_file_2), number=iterations)
    print("Time taken for inserting for test 2: ", time_for_insert/iterations)
    time_for_search = timeit.timeit(lambda: BST_search(BSTree), number=iterations)
    print("Time taken for searching for test 2: ", time_for_search/iterations)
    
def bst_test_3():
    BSTree = BinarySearchTreeSet()
    time_for_insert = timeit.timeit(lambda: BST_insert(BSTree, test_file_3), number=iterations)
    print("Time taken for inserting for test 3: ", time_for_insert/iterations)
    time_for_search = timeit.timeit(lambda: BST_search(BSTree), number=iterations)
    print("Time taken for searching for test 3: ", time_for_search/iterations)


with open("./testfiles/test-search.txt", "r") as test_search:
    with open("./testfiles/test1-mobydick.txt", "r") as test_file_1:
        bst_test_1()

with open("./testfiles/test-search.txt", "r") as test_search:
    with open("./testfiles/test2-warpeace.txt", "r") as test_file_2:
        bst_test_2()

with open("./testfiles/test-search.txt", "r") as test_search:
    with open("./testfiles/test3-dickens.txt", "r") as test_file_3:
        bst_test_3()

test_search.close()


Time taken for inserting for test 1:  0.349019916029647
Time taken for searching for test 1:  0.0010932079749181867
Time taken for inserting for test 2:  0.9382864999352023
Time taken for searching for test 2:  0.0010407500667497516
Time taken for inserting for test 3:  8.19743462500628
Time taken for searching for test 3:  0.0011738750617951155
