# Searching algorithms on Python by __[Artyom Iudin](https://github.com/Tomas542/DSaA)__

<div class="alert alert-block alert-info">
<b>Chapter navigation</b> is broken on git repos. Download .ipynb to use it.
</div>

# Chapters
0. [Preparetions](#preps)
      1. [Import](#import)
      2. [In bounds func](#in_bounds)
1. [Linear search](#linear)
2. [Binary search](#binary)
3. [Binary search tree](#binary_tree)
4. [Fibonacci search](#fibonacci)
5. [Interpolation search](#interpolation)
6. [Bloom filter](#bloom)
7. [Hash-based search](#hash_common)
      1. [Without rehash](#hash_no)
      2. [With simple rehash](#hash_simple)
      2. [With random rehash](#hash_random)
      3. [With chain rule rehash](#hash_chain)

# Preparetions <a class="anchor" id="preps"></a>

### Import <a class="anchor" id="import"></a>

Import of <b>random</b> module for random rehash.

In [1]:
import random

### In bounds func <a class="anchor" id="in_bounds"></a>

When we have sorted structure we can check if target in bounds of it.

In [None]:
def in_bounds(array: list[int], target: int) -> bool:
    return array[0] <= target <= array[-1]

# Linear search <a class="anchor" id="linear"></a>

Check every element of array (or another structure). Only useful if array is not sorted.

Best case Scenario : O(1) (if it is first element)

Worst case Scenario : O(n) 

In [None]:
def linear_search(array: list[int], target: int) -> int:
    for i in range(len(array)):
        # we found it
        if array[i] == target:
            return i

# Binary search <a class="anchor" id="binary"></a>

Deviding array by two until finds the element

Worst case Scenario : O(log n) 

In [None]:
def binary_search(array: list[int], target: int) -> int:
    # initializing 2 variables for out boundaries
    low = 0
    high = len(array) - 1

    if not(in_bounds(array, target)):
        return -1

    # search until our boundaries won't be equal
    while low <= high:
        # calculating middle element that we will check
        mid = (low + high) // 2 # it can be '<< 1' instead of '// 2' 

        # if target lower move high bound 
        if array[mid] > target:
            high = mid - 1

        # if target higher move low target
        elif array[mid] < target:
            low = mid + 1

        # we found it
        elif array[mid] == target:
            return mid
    
    # didn't find
    return -1

# Binary search tree <a class="anchor" id="binary_tree"></a>

Building binary tree data structure to search in nodes.

Best case Scenario : O(log n) - balanced

Worst case Scenario : O(n) - unbalanced

In [None]:
class BSTNode:
    def __init__(self, value:int = None):
        # value stored in the node
        self.value = value
        # stored lower and higher values in nodes
        self.left = None
        self.right = None

    def __contains__(self, value:int) -> bool:
        # we found value
        if self.value == value:
            return True

        # we search on the left
        if self.value > value:
            if self.left == None:
                return False
            
            # go deeper
            return self.left.search(value)
        
        # we search on the right
        if self.right == None:
            return False
        
        # go deeper
        return self.right.search(value)

    def add(self, value:int) -> None:
        # if we got empty node
        if not self.value:
            self.value = value
            return 
        
        # already have this number
        if self.value == value:
            return 
        
        # move to the right node
        if self.value < value:
            # right node  isn't empty
            if self.right:
                self.right.add(value)
                return 
            
            # right node is empty
            self.right = BSTNode(value)
            return 
        
        # move to the left node
        if self.left:
            # left node isn't empty
            self.left.add(value)
            return
        
        # left node is empty
        self.left = BSTNode(value)

    def get_min(self) -> int:
        current = self
        
        # moing to the left nodes till the end
        while current.left is not None:
            current = current.left
        
        return current.value
    
    def get_max(self) -> int:
        current = self
        
        # moving to the right nodes till end
        while current.right is not None:
            current = current.right
        
        return current.value
    
    def delete(self, value:int):
        # empty tree
        if self == None:
            return self
        
        # searching on the right
        if self.value < value:
            # we can move to the right (it is not empty)
            if self.right:
                self.right = self.right.delete(value)

            # we didn't find the value in the tree
            return self
        
        # searching on the left
        if self.value > value:
            # we can move to the left (it is not empty)
            if self.left:
                self.left = self.left.delete(value)
            
            # we didn't find the value in the tree
            return  self
        
        # we found the value but we don't have value on the right
        if self.right == None:
            return self.left
        
        # we trying to replace deleting value but we don't have value on the left
        if self.left == None:
            return self.right
        
        # chosing smallest number after our deleting value. First go on the right
        min_larger_node = self.right

        # chosing smallest number after our deleting value. Second go to the left
        while min_larger_node.left:
            min_larger_node = min_larger_node.left
        
        # replace deleting value
        self.value = min_larger_node.value
        # remove replacing value
        self.right = self.right.delete(min_larger_node.value)
        return self
    
    # sorted values of the tree
    def inorder(self, values:list[int] = []) -> list[int]:
        if self.left is not None:
            self.left.inorder(values)

        if self.value is not None:
            values.append(self.value)

        if self.right is not None:
            self.right.inorder(values)
        
        return values

    # one of two orders to check structure of the tree
    def preorder(self, values:list[int] = []) -> list[int]:
        if self.value is not None:
            values.append(self.value)
        
        if self.left is not None:
            self.left.preorder(values)

        if self.right is not None:
            self.right.preorder(values)
        
        return values
    
    # one of two orders to check structure of the tree
    def postorder(self, values:list[int] = []) -> list[int]:
        if self.left is not None:
            self.left.postorder(values)

        if self.right is not None:
            self.right.postorder(values)
        
        if self.value is not None:
            values.append(self.value)

        return values

# Fibonacci search <a class="anchor" id="fibonacci"></a>

Like binary search, but we uses fibonacci numbers as indexes. Only useful if target in 1/3 of array. Subsearch could be any algorythm of search. I used binary search.

Average case Scenario: O(n/3) * we know target in 1/3 of array

Worst case Scenario: O(n)

In [None]:
def fibonacci_search(array:list[int], target:int) -> int:
    if not(in_bounds(array, target)):
        return -1
    
    # initialize 2 fibonacci numbers. fib1 contains low border, fib2 - high
    fib1 = 0
    fib2 = 1

    # searching for borders
    while array[fib2] <= target:
        # calulating new borders
        fib2, fib1 = fib1 + fib2, fib2

    # searching in the subarray with our borders. Could be another searching algorythms
    bs_ind = binary_search(array[fib1:fib2 + 1], target)
    
    # we didn't find our value
    if bs_ind == -1:
        return -1
    
    # case when our searched number is the last in the array
    return min(bs_ind + fib1, len(array))

# Interpolation search <a class="anchor" id="interpolation"></a>

Calculating approximate position of target. Looks similar to binary search.

Best and Average cases Scenario: O(log log n)

Worst case Scenario: O(n)

In [None]:
def interpolation_search(array:list[int], target: int) -> int:
    if not(in_bounds(array, target)):
        return -1
    
    # initialize 2 bounds
    low = 0
    high = len(array) - 1

    while low <= high and array[low] <= target:
        # calculating our middle with this formula
        middle = low + int(((high - low) / (array[high] - array[low])) * (target - array[low]))

        # going up cause target is higher
        if array[middle] < target:
            low = middle + 1
            
        # goind down cause target is lower
        elif array[middle] > target:
            high = middle - 1
        
        # found it
        else:
            return middle

    # didn't find
    return -1

# Bloom filter <a class="anchor" id="bloom"></a>

Analog of hash map. Can give false positive answer.

Time complexity: O(k)

In [None]:
class BloomFilter:
    def __init__(self, size:int = 1000, hash_func = None):
        # container of values
        self.bits = 0
        self.size = size

        if hash_func is None:
            # number of hash functions
            self.k = 1
            self.hash_func = [lambda e, size: hash(e) % size]
        
        else:
            # number of hash functions
            self.k = len(hash_func)
            self.hash_func = hash_func
    
    def add(self, value: int):
        for hf in self.hash_func:
            # bitwise manipulations
            self.bits |= 1 << hf (value, self.size)

    def __contains__(self, value:int) -> bool:
        for hf in self.hash_func:
            if self.bits & 1 << hf(value, self.size) == 0:
                return False

        return True

# Hash-based search <a class="anchor" id="hash_common"></a>

This search method is based on calculating hash of key and adding it to hash table

Best and Average cases Scenario: O(1)

Worst case Scenario: O(n)

In [None]:
class HashTable:
    def __init__(self, MAX:int = 10) -> None:
        # size of the array
        self.MAX = MAX
        
    def get_hash(self, key:str) -> int:
        hash_value = 0
        for letter in key:
            # calculating our key as sum of ACSII numbers
            hash_value += ord(letter)
        
        # returning hash value as sum of all letters of key % size of hash map
        return hash_value % self.MAX

### Without rehash <a class="anchor" id="hash_no"></a>

In this variant we just change value in the cell if it is not empty

In [None]:
class HashTableSearch(HashTable):
    def __init__(self, MAX:int = 10) -> None:
        super().__init__(MAX)
        self.array = [None for _ in range(self.MAX)]

    def __setitem__(self, key:str, value:int) -> None:
        # adding new value into our hash_map
        self.array[self.get_hash(key)] = (key, value)
        # checking our hash_map for space
        if len(set(self.array)) >= 0.7 * self.MAX:
            self.resize()

    def add(self, key:str, value:int) -> None:
        # adding new value into our hash_map
        self.array[self.get_hash(key)] = (key, value)
        # checking our hash_map for space
        if len(set(self.array)) >= 0.7 * self.MAX:
            self.resize()

    def __getitem__(self, key:str) -> int:
        return self.array[self.get_hash(key)][1]

    def get(self, key:str) -> int:
        return self.array[self.get_hash(key)][1]

    def resize(self) -> None:
        # creating new map 2 times larger
        self.MAX *= 2
        new_arr = [None for _ in range(self.MAX)]

        for i in range(len(self.array)):
            if self.array[i] != None:
                # adding our numbers into new array with saving it's hash
                new_arr[self.get_hash(self.array[i][0])] = self.array[i]
        
        self.array = new_arr.copy()

### With simple rehash <a class="anchor" id="hash_simple"></a>

We just going to the next free cell.

In [None]:
class HashTableSimple(HashTable):
    def __init__(self, MAX:int = 10) -> None:
        super().__init__(MAX)
        self.array = [None for _ in range(self.MAX)]
        self._length = len(self.array)

    def __setitem__(self, key:str, value:int) -> None:
        hash_value = self.get_hash(key)
        if self.array[hash_value] == None or self.array[hash_value][0] == key:
            self.array[hash_value] = (key, value)
        
        else:
            for i in range(self._length):
                if self.array[abs(i - self._length)] == None:
                    self.array[abs(i - self._length)] = (key, value)

        # checking our hash_map for space
        if len(set(self.array)) >= 0.7 * self.MAX:
            self.resize()

    def __getitem__(self, key:str) -> int:
        hash_value = self.get_hash(key)
        
        # it is on it's hash position
        if self.array[hash_value][0] == key:
            return self.array[hash_value][1]
        
        else:
            for i in range(self._length):
                if self.array[abs(i - self._length)][0] == key:
                    # it was rehashed
                    return self.array[abs(i - self._length)][1]
        
        # we have nothing
        return float('inf')

    def resize(self) -> None:
        # creating new map 2 times larger
        self.MAX *= 2
        new_arr = [None for _ in range(self.MAX)]

        for i in range(len(self.array)):
            if self.array[i] != None:
                # adding our numbers into new array with saving it's hash
                new_arr[self.get_hash(self.array[i][0])] = self.array[i]
        
        self.array = new_arr.copy()

### With random rehash <a class="anchor" id="hash_random"></a>

We taking random cells trying to find free one. 

<div class="alert alert-block alert-danger">
You should beware searching values that are not inside hash table.
</div>

In [14]:
class HashTableRandom(HashTable):
    def __init__(self, MAX:int = 10) -> None:
        super().__init__(MAX)
        self.array = [None for _ in range(self.MAX)]
        self._length = len(self.array)
        # this will allow us to find our values
        random.seed(0)
        
    def __setitem__(self, key:str, value:int) -> None:
        hash_value = self.get_hash(key)
        if self.array[hash_value] == None or self.array[hash_value][0] == key:
            self.array[hash_value] = (key, value)
        
        else:
            while self.array[hash_value] == None:
                # getting random cell index
                hash_value = self.get_hash(random.randint(-1000000, 1000000))

            self.array[hash_value] = (key, value)

        # checking our hash_map for space
        if len(set(self.array)) >= 0.7 * self.MAX:
            self.resize()

    def __getitem__(self, key:str) -> int:
        hash_value = self.get_hash(key)
        
        # it is on it's hash position
        if self.array[hash_value][0] == key:
            return self.array[hash_value][1]
        
        else:
            random.seed(0)
            while self.array[hash_value][0] != key:
                # generating numbers trying to find our cell
                hash_value = self.get_hash(random.randint(-1000000, 1000000))
            return self.array[hash_value][1]

    def resize(self) -> None:
        # creating new map 2 times larger
        self.MAX *= 2
        new_arr = [None for _ in range(self.MAX)]

        for i in range(len(self.array)):
            if self.array[i] != None:
                # adding our numbers into new array with saving it's hash
                new_arr[self.get_hash(self.array[i][0])] = self.array[i]
        
        self.array = new_arr.copy()

### With chain rule rehash <a class="anchor" id="hash_chain"></a>

We continuing adding values into our hash table as cells of subarray. Also possible to have it as linked list

In [None]:
class HashTableChain(HashTable):
    def __init__(self, MAX: int = 10) -> None:
        super().__init__(MAX)
        # creating empty array for our values
        self.array = [[] for _ in range(self.MAX)]

    def __getitem__(self, key:str) -> int:
        arr_index = self.get_hash(key)
        for key_value in self.array[arr_index]:
            # if first element of tuple is our key - we found it
            if key_value[0] == key:
                return key_value[1]
            
    def __setitem__(self, key:str, val:int) -> None:
        hash_value = self.get_hash(key)
        check = False

        for i, element in enumerate(self.array[hash_value]):
            # if it is tuple and our key
            if len(element) == 2 and element[0] == key:
                self.array[hash_value][i] = (key, val)
                check = True

        if not check:
            # we didn't find it
            self.array[hash_value].append((key, val))
