# 1 Simple Hash Function

In [0]:
def hash(str):
    return ord(str[0])%11

### 1. Calculate the hash value of the following names:

**Note: E=69, A=65, T=84, R=82, J=74, K=75, D=68**

* Eva: E%11=3
* Amy: A%11=10
* Tim: T%11=7
* Ron: R%11=5
* Jan: J%11=8
* Kim: K%11=9
* Dot: D%11=2
* Ann: A%11=10
* Jim: J%11=8
* Jon: J%11=8

In [2]:
print(hash('Eva'))
print(hash('Amy'))
print(hash('Tim'))
print(hash('Ron'))
print(hash('Jan'))
print(hash('Kim'))
print(hash('Dot'))
print(hash('Ann'))
print(hash('Jim'))
print(hash('Jon'))

3
10
7
5
8
9
2
10
8
8


### 2. Using Separate Chaining, insert these names into a Hash Table of size 11.



Generally, represent collision items in a linked list.

8 | Jan -> Jim -> Jon

In [0]:
class HashTable_Chaining:
    def __init__(self, n):
        self._table = [None]*n
        
    def add(self, item):
        key = ord(item[0])
        pos = self._hashing(key)
        if self._table[pos] is None:
            self._table[pos] = item
        else:
            if isinstance(self._table[pos], list):
                self._table[pos].append(item)
            else:
                self._table[pos] = [self._table[pos]]
                self._table[pos].append(item)
    
    def remove(self, item):
        key = ord(item[0])
        pos = self._hashing(key)
        if isinstance(self._table[pos], list):
            if item in self._table[pos]:
                self._table[pos].remove(item)
        else:
            if self._table[pos] == item:
                self._table[pos] = None
    
    def _hashing(self, val):
        return val%len(self._table)
    
    def get_table(self):
        return self._table
    
ht = HashTable_Chaining(11)
ht.add('Eva')
ht.add('Amy')
ht.add('Tim')
ht.add('Ron')
ht.add('Jan')
ht.add('Kim')
ht.add('Dot')
ht.add('Ann')
ht.add('Jim')
ht.add('Jon')
print(ht.get_table())

[None, None, 'Dot', 'Eva', None, 'Ron', None, 'Tim', ['Jan', 'Jim', 'Jon'], 'Kim', ['Amy', 'Ann']]


### 3. Using Open Addressing and linear probing, insert these names into a Hash Table of size 11.

['Ann' ,'Jim', 'Dot', 'Eva', 'Jon', 'Ron', None, 'Tim', 'Jan', 'Kim', 'Amy']

In [7]:
class HashTable_Linear:
    def __init__(self, n):
        self._table = [None]*n
        
    def add(self, item):
        key = ord(item[0])
        pos = self._hashing(key)
        if self._table[pos] is None:
            self._table[pos] = item
        else:
            i = 1
            while self._table[pos] != None:
                pos = (pos+i)%len(self._table)
            self._table[pos] = item
    
    def remove(self, item):
        pass
    
    def _hashing(self, val):
        return val%len(self._table)
    
    def get_table(self):
        return self._table
    
    
ht2 = HashTable_Linear(11)
ht2.add('Eva')
ht2.add('Amy')
ht2.add('Tim')
ht2.add('Ron')
ht2.add('Jan')
ht2.add('Kim')
ht2.add('Dot')
ht2.add('Ann')
ht2.add('Jim')
ht2.add('Jon')
print(ht2.get_table())

['Ann', 'Jim', 'Dot', 'Eva', 'Jon', 'Ron', None, 'Tim', 'Jan', 'Kim', 'Amy']


# 2 Search in a list

Suppose we have a database stored as a linked list of length $n$, where each node $i$ contains data $d$, along with a hash value $k$. Comparison between data is expensive. When searching the list to determine if it contains some data $d*$, how could we take advantage of the hash values?

Solution:

Since the comparison between data is expensive, it's necessary to minimize directly compare data. One way to search is to compare their hash value. While searching data $d*$, we firstly compute its hash value $k_{d*} = H(d*)$. Then finding the matched hash value node in the list. If there was a node contains the same hash value, we then compare their data.

# 3 Finding the maximum

Suppose that a Hash Table with size $n$ contains $k$ integer values that belong to the interval $\{1, ..., m\}$, that have been successfully added without collision.

### 1. Design an algorithm *A* that searches for the maximum entry in the Hash Table, using the hashing function (which you do not need to know).

```
if k == m:
    return m

max_v = 0
if m > n:
    for v in hash_table:
        if v > max_v:
            max_v = v
elif m < n:
    count = 0
    for v in range(1, m+1):
        if v in hash_table:
            count += 1
        if count == k:
            return v
```

### 2. What is the worst-case cimplexity of algorithm *A*? Compare with a linear search.

$T(n) = n\times (c + 2c\times \frac{m+1-r}{r}) = nc + 2mn\frac{c}{r} + \frac{2cn}{r} - 2nc$

As of worst case, inside the table, there is only one value whose hash value is 0.

### 3. Suppose that $k = \frac{m}{2}$, and that the integers are drawn uniformly (and without repeats) from $\{1, ..., m\}$. What is the average runtime complexity of algorithm A?

### 4. Suppose instead that $1 \leq k \leq m$ . What is the average runtime complexity of *A*? Compare with a linear search.

# 4 Hash Table implementation

Start by looking at the ADT and the implementation of `HashTable` given in the online book. (https://runestone.academy/runestone/static/FIT5211/SortSearch/Hashing.html#hash-functions)

### 1. Implement quadratic probing instead of linear probing.

In [0]:
class HashTable_Quadratic:
    def __init__(self, n):
        self._key = [None]*n
        self._value = [None]*n
        self._length = n
        
    def add(self, key, val):
        if isinstance(key, str):
            hash_value = ord(item[0])
        else:
            hash_value = key
        hash_value = self._hashing(hash_value)
        if self._key[hash_value] is not None:
            i = 1
            while self._key[hash_value] != None:
                hash_value = (hash_value+i)**2%self._length
                i += 1
        self._key[hash_value] = key
        self._value[hash_value] = value
        
    def get(self, key):
        MAX_TRIAL = 23
        
        if isinstance(key, str):
            hash_value = ord(item[0])
        else:
            hash_value = key
        hash_value = self._hashing(hash_value)
        if self._key[hash_value] != key:
            i = 1
            while self._key[hash_value] != key:
                hash_value = (hash_value+i)**2%self._length
                i += 1
                if i == MAX_TRIAL:
                    return None
        return self._key[hash_value], self._value[hash_value]
    
    def __getitem__(self, key):
        return self.get(key)
      
    def remove(self, item):
        if isinstance(item, str):
            key = ord(item[0])
        else:
            key = item
        pos = self._hashing(key)
        if self._table[pos] == item:
            self._table[pos] = None
        else:
            i = 1
            while self._table[pos] != None:
                pos = (pos+i)**2%self._length
                i += 1
            self._table[pos] = None
    
    def __len__(self):
        return self._length
    
    def _hashing(self, val):
        return val%len(self._table)


In [0]:
class HashTable_Quadratic:
    def __init__(self, n):
        self._table = [None]*n
        
    def add(self, item):
        key = ord(item[0])
        pos = self._hashing(key)
        if self._table[pos] is None:
            self._table[pos] = item
        else:
            i = 1
            while self._table[pos] != None:
                pos = (pos+i)%len(self._table)
            self._table[pos] = item
    
    def remove(self, item):
        pass
    
    def _hashing(self, val):
        return val%len(self._table)
    
    def get_table(self):
        return self._table

In [19]:
htq = HashTable_Quadratic(11)
htq.add('Eva')
htq.add('Amy')
htq.add('Tim')
htq.add('Ron')
htq.add('Jan')
htq.add('Kim')
htq.add('Dot')
htq.add('Ann')
htq.add('Jim')
htq.add('Jon')

[None, None, None, 'Eva', None, None, None, None, None, None, None]
[None, None, None, 'Eva', None, None, None, None, None, None, 'Amy']
[None, None, None, 'Eva', None, None, None, 'Tim', None, None, 'Amy']
[None, None, None, 'Eva', None, 'Ron', None, 'Tim', None, None, 'Amy']
[None, None, None, 'Eva', None, 'Ron', None, 'Tim', 'Jan', None, 'Amy']
[None, None, None, 'Eva', None, 'Ron', None, 'Tim', 'Jan', 'Kim', 'Amy']
[None, None, 'Dot', 'Eva', None, 'Ron', None, 'Tim', 'Jan', 'Kim', 'Amy']
['Ann', None, 'Dot', 'Eva', None, 'Ron', None, 'Tim', 'Jan', 'Kim', 'Amy']
['Ann', None, 'Dot', 'Eva', 'Jim', 'Ron', None, 'Tim', 'Jan', 'Kim', 'Amy']
['Ann', 'Jon', 'Dot', 'Eva', 'Jim', 'Ron', None, 'Tim', 'Jan', 'Kim', 'Amy']


### 2. Implment chaining instead of open addressing

In [0]:
class HashTable_Chaining:
    class Node:
        def __init__(self, val):
            self._val = val
            self._next = None
            
        def getNext(self):
            return self._next
        
        def setNext(self, node):
            self._next = node
            
        def getVal(self):
            return self._val
        
        def setVal(self, val):
            self._val = val
            
        def getEnd(self):
            p = self
            while p.getNext() != None:
                p = p.getNext()
            return p
    
    def __init__(self, n):
        self._table = [None]*n
        
    def add(self, item):
        if isinstance(item, string):
            key = ord(item[0])
        else:
            key = item
        pos = self._hashing(key)
        if self._table[pos] is None:
            self._table[pos] = item
        else:
            if isinstance(self._table[pos], Node):
                p = self._table[pos].getEnd()
                p.setNext(Node(item))
            else:
                self._table[pos] = Node(self._table[pos])
    
    def remove(self, item):
        key = ord(item[0])
        pos = self._hashing(key)
        if isinstance(self._table[pos], list):
            if item in self._table[pos]:
                self._table[pos].remove(item)
        else:
            if self._table[pos] == item:
                self._table[pos] = None
    
    def _hashing(self, str):
        return str%len(self._table)
    
    def get_table(self):
        return self._table