> ### EEE2020: Data Structures & Algorithms

# Lecture 7: Searching

## 1. What is Searching?

In [None]:
15 in [3,5,2,4,1]

In [None]:
3 in [3,5,2,4,1]

## 2. The Sequential Search 

In [None]:
def sequential_search(a_list, item): 
    pos = 0
    found = False
    
    while pos < len(a_list) and not found: 
        if a_list[pos] == item:
            found = True 
        else:
            pos = pos+1 
            
    return found

In [None]:
test_list = [1, 2, 32, 8, 17, 19, 42, 13, 0]

In [None]:
print(sequential_search(test_list, 3))

In [None]:
print(sequential_search(test_list, 13))

## 3. The Ordered Sequential Search 

In [None]:
def ordered_sequential_search(a_list, item): 
    pos = 0
    found = False
    stop = False
    
    while pos < len(a_list) and not found and not stop: 
        if a_list[pos] == item:
            found = True 
        else:
            if a_list[pos] > item: 
                stop = True
            else:
                pos = pos+1
                
    return found

In [None]:
test_list = [1, 2, 32, 8, 17, 19, 42, 13, 0]

In [None]:
print(sequential_search(test_list, 3))

In [None]:
print(sequential_search(test_list, 13))

## 4. The Binary Search 

In [None]:
def binary_search(a_list, item): 
    first = 0
    last = len(a_list) - 1
    found = False
    
    while first <= last and not found: 
        midpoint = (first + last) // 2 
        if a_list[midpoint] == item:
            found = True 
        else:
            if item < a_list[midpoint]: 
                last = midpoint - 1
            else:
                first = midpoint + 1
    return found

In [None]:
test_list = [0, 1, 2, 8, 13, 17, 19, 32, 42]

In [None]:
print(binary_search(test_list, 3))

In [None]:
print(binary_search(test_list, 13))

### 4.1 Implementation using Recursion 

In [None]:
def binary_search(a_list, item): 
    if len(a_list) == 0:
        return False 
    else:
#         print('lo, hi = ', (a_list[0], a_list[-1]))
        midpoint = len(a_list) // 2

    if a_list[midpoint] == item: 
        return True
    else:
        if item < a_list[midpoint]:
            return binary_search(a_list[:midpoint], item) 
        else:
            return binary_search(a_list[midpoint + 1:],item)

In [None]:
test_list = [0, 1, 2, 8, 13, 17, 19, 32, 42]

In [None]:
print(binary_search(test_list, 3))

In [None]:
print(binary_search(test_list, 13))

In [None]:
binary_search(list(range(100)), 20)

In [None]:
binary_search(list(range(100)), -1)

In [None]:
binary_search(list(range(100)), 50.5)

## 5. Sequential Search Vs. Binary Search

In [None]:
import random

In [None]:
random.randrange(5)

In [None]:
import timeit

In [None]:
import matplotlib.pyplot as plt

In [None]:
def time_seq_search(size):
    return timeit.timeit('ordered_sequential_search(lst, random.randrange({}))'.format(size), # interpolate size into randrange
                         'import random ; from __main__ import ordered_sequential_search ;'
                         'lst = [x for x in range({})]'.format(size), # interpolate size into list range
                         number=10)

In [None]:
def time_bin_search(size):
    return timeit.timeit('binary_search(lst, random.randrange({}))'.format(size), # interpolate size into randrange
                         'import random ; from __main__ import binary_search ;'
                         'lst = [x for x in range({})]'.format(size), # interpolate size into list range
                         number=10)

In [None]:
seq_search_timings = [time_seq_search(n)
                      for n in range(10, 100000, 100)]

In [None]:
bin_search_timings = [time_bin_search(n)
                      for n in range(10, 100000, 100)]

In [None]:
plt.xlabel('Size of input (n)', fontsize=16)
plt.ylabel('Time', fontsize=16)
plt.title('Sequential Search Vs. Binary Search', fontsize = 20)
plt.plot(range(10, 100000, 100), seq_search_timings, 'ro')
plt.plot(range(10, 100000, 100), bin_search_timings, 'gs')
plt.legend(['Sequential Search $\mathcal{O}(n)$', 'Binary Search $\mathcal{O}(\log n)$'], 
             loc='best', fontsize=15);
plt.show()

- ## $\mathcal{O}(n)$ vs. $\mathcal{O}(\log n)$ 

## 6. Hashing

In [None]:
class HashTable:
    def __init__(self, size = 11):
        self.size = size
        self.slots = [None] * self.size
        self.data = [None] * self.size
        
    def put(self, key, data):
        hash_value = self.hash_function(key,len(self.slots))
        
        if self.slots[hash_value] == None: 
            self.slots[hash_value] = key 
            self.data[hash_value] = data
        else:
            if self.slots[hash_value] == key:
                self.data[hash_value] = data #replace 
            else:                    
                next_slot = self.rehash(hash_value, len(self.slots))
                while self.slots[next_slot] != None and self.slots[next_slot] != key:
                    next_slot = self.rehash(next_slot, len(self.slots))
                    
                if self.slots[next_slot] == None:
                    self.slots[next_slot] = key
                    self.data[next_slot] = data
                else:
                    self.data[next_slot] = data #replace
                        
    def hash_function(self, key, size):
        return key % size
    
    def rehash(self, old_hash, size):
        return (old_hash + 1) % size
    
    def get(self, key):
        start_slot = self.hash_function(key, len(self.slots))
        
        data = None
        stop = False
        found = False
        position = start_slot
        while self.slots[position] != None and not found and not stop: 
            if self.slots[position] == key:
                found = True
                data = self.data[position] 
            else:
                position=self.rehash(position, len(self.slots)) 
                if position == start_slot:
                    stop = True
        return data
    
    def __getitem__(self, key): 
        return self.get(key)
    
    def __setitem__(self, key, data): 
        self.put(key, data)

In [None]:
h=HashTable()

In [None]:
h[54]="cat"
h[26]="dog"
h[93]="lion"
h[17]="tiger"
h[77]="bird"
h[31]="cow"
h[44]="goat"
h[55]="pig"
h[20]="chicken"

In [None]:
h.slots

In [None]:
h.data

In [None]:
h[20]

In [None]:
h[17]

In [None]:
h[20] = 'duck'

In [None]:
h[20]

In [None]:
h.data

In [None]:
print(h[99])

## 7. Binary Search Vs. Hashing

In [None]:
def prep_ht(size):
    ht = HashTable(size)
    for x in range(size):
        ht[x] = x
    return ht

In [None]:
def time_bin_search(size):
    return timeit.timeit('binary_search(lst, random.randrange({}))'.format(size), # interpolate size into randrange
                         'import random ; from __main__ import binary_search ;'
                         'lst = [x for x in range({})]'.format(size), # interpolate size into list range
                         number=50)

def time_hash(size):
    return timeit.timeit('ht[random.randrange({})]'.format(size), 
                         'import random ; from __main__ import prep_ht ;'
                         'ht = prep_ht({})'.format(size),
                         number=50)

bin_search_timings = [time_bin_search(n)
                      for n in range(10, 10000, 100)]

hash_timings = [time_hash(n)
                for n in range(10, 10000, 100)]

In [None]:
plt.xlabel('Size of input (n)', fontsize=16)
plt.ylabel('Time', fontsize=16)
plt.title('Binary Search Vs. Hashing', fontsize = 20)
plt.plot(range(10, 10000, 100), bin_search_timings, 'gs')
plt.plot(range(10, 10000, 100), hash_timings, 'b^')
plt.legend(['Binary Search $\mathcal{O}(\log n)$', 'Hashing $\mathcal{O}(1)$'], 
            loc='best', fontsize=15);
plt.show()

## $\mathcal{O}(\log n)$ vs. $\mathcal{O}(1)$. 