# ECS529U Algorithms and Data Structures
# Lab sheet 8

This lab gets you to work with hash tables. Some helper code has been provided at the end of this notebook.

**Marks (max 5):**  Questions 1-5: 1 each

## Question 1

Consider hash tables with underlying array of size 7, and (initial) hash function the remainder of dividing by 7:

    def hash(d): return d%7
    
Let `A` be the array `[412, 21, 420, 3, 56, 99, 1528, 21, 462, 409]`. You are asked the following.

a) Draw the hash table we obtain if we start from the empty one and add consecutively the elements of `A`, 
without resizing up the underlying array.

b) Draw the hash table we obtain if we start from the empty one and add consecutively the elements of `A`, but this time using resizing of the underlying array whenever the load factor exceeds 0.75. 

The hash function should be updated accordingly after each resize operation. You can use Python to calculate remainder operations.

## Question 2

Add in `HashTable` (the code of which you can find in `lecture8.ipynb`) the following functions:

    a) def count(self, d)
    
that returns the number of times that value `d` occurs in the hash table (represented by `self`). Note your function should not change the hash table. For example, the following code:

    h = HashTable(); h.addAll([5,10,0,42,42])
    print(h.count(0),h.count(1),h.count(42))
    
should print `1 0 2`. 

*Hint:* you can add a corresponding function `count` in `LinkedList` and simply return the result of counting for `d` in the linked list in position `hash(d)` of the underlying array.

    b) def max(self)

that returns the largest element in the hashtable (represented by `self`). Your solution should not use sorting.

    c) def __str__(self)

that returns a string representing the hashtable in the following format. 

- The string should contain a sequence of lines, one for each entry of the underlying array, separated by newlines ('\n'). 
- Each of these lines should start with the index of the underlying array, included in square brackets, followed by a string representation of the linked list in that index.

For example, if `h` is a hashtable with an underlying array of size 7 constructed by:

    h = HashTable(7); h.addAll([42,2049,12,5,18])
    
then, `str(h)` should return the string:

    [0] -> 42\n[1]\n[2]\n[3]\n[4] -> 18\n[5] -> 5 -> 12 -> 2049\n[6]\n'

And, therefore, `print(h)` would print:

    [0] -> 42
    [1]
    [2]
    [3]
    [4] -> 18
    [5] -> 5 -> 12 -> 2049
    [6]
    
__Hint:__ You might find useful to replace the `__str__` function of `LinkedList` with one that is more suitable for this task (see `lab8-prep.ipynb`).

In [6]:
class HashTable:
    def __init__(self, m=10):
        self.inArray = [LinkedList() for i in range(m)] 
        self.size = 0
        self.threshold = 0.75
        self.iterPtr = (-1, -1)

    def hash(self, d):
        return d % len(self.inArray)
           
    def add(self, d):
        i = self.hash(d)
        self.inArray[i].insert(0, d)
        self.size += 1
        if self.size > self.threshold * len(self.inArray): 
            self._resizeUp()
        
    def search(self, d):
        i = self.hash(d)
        return self.inArray[i].search(d) != -1

    def remove(self, d):
        i = self.hash(d)
        if self.inArray[i].removeVal(d):
            self.size -= 1
            return True
        return False

    def _resizeUp(self):
        oldArray = self.inArray
        self.inArray = [LinkedList() for _ in range(2 * len(oldArray))]
        self.size = 0
        for l in oldArray:
            ptr = l.head
            while ptr is not None:
                self.add(ptr.data)
                ptr = ptr.next

    def __iter__(self):
        self.iterPtr = (-1, -1)
        return self

    def __next__(self):
        bucket_idx, position = self.iterPtr
        
        if bucket_idx == -1 or position == -1:
            bucket_idx += 1
            position = 0
            while bucket_idx < len(self.inArray) and self.inArray[bucket_idx].head is None:
                bucket_idx += 1
            if bucket_idx >= len(self.inArray):
                raise StopIteration
            self.iterPtr = (bucket_idx, 0)
            return self.inArray[bucket_idx].get(0)
        
        current_bucket = self.inArray[bucket_idx]
        if position + 1 < current_bucket.length:
            self.iterPtr = (bucket_idx, position + 1)
            return current_bucket.get(position + 1)
        else:
            bucket_idx += 1
            while bucket_idx < len(self.inArray) and self.inArray[bucket_idx].head is None:
                bucket_idx += 1
            if bucket_idx >= len(self.inArray):
                raise StopIteration
            self.iterPtr = (bucket_idx, 0)
            return self.inArray[bucket_idx].get(0)

    def items(self):
        for bucket in self.inArray:
            ptr = bucket.head
            while ptr is not None:
                yield ptr.data
                ptr = ptr.next

    def addAll(self, A):
        for i in range(len(A)):
            self.add(A[i])

    def removeAll(self, A):
        for i in range(len(A)):
            self.remove(A[i])

##################################################################
# a) count method
    def count(self, d):
        i = self.hash(d)
        return self.inArray[i].count(d)

##################################################################
# b) max method
    def max(self):
        max_val = None
        for i in range(len(self.inArray)):
            if self.inArray[i].head is not None:
                ptr = self.inArray[i].head
                while ptr is not None:
                    if max_val is None or ptr.data > max_val:
                        max_val = ptr.data
                    ptr = ptr.next
        return max_val

##################################################################
    def __str__(self):
        result = ""
        for i in range(len(self.inArray)):
            result += "[" + str(i) + "]"
            if self.inArray[i].head is not None:
                result += " -> "
                ptr = self.inArray[i].head
                while ptr is not None:
                    result += str(ptr.data)
                    if ptr.next is not None:
                        result += " -> "
                    ptr = ptr.next
            if i < len(self.inArray) - 1:
                result += "\n"
        return result
##################################################################

## Question 3

Implement in `HashTable` a function

    def hash2(self, d)

so that, for each input integer `d`, it adds together all the digits of `d` and takes the remainder 
of dividing the resulting number by the length of the internal array. For example, if `h` is built 
as follows:

    h = HashTable(7); h.hash = h.hash2; h.addAll([42,2049,12,5,18])
    
then `print(h)` should print (using your implementation of `str`):

    [0]
    [1] -> 2049
    [2] -> 18
    [3] -> 12
    [4]
    [5] -> 5
    [6] -> 42

In [22]:
class HashTable:
    def __init__(self, m=10):
        self.inArray = [LinkedList() for i in range(m)] 
        self.size = 0
        self.threshold = 0.75
        self.iterPtr = (-1, -1)

    def hash(self, d):
        return d % len(self.inArray)
##################################################################
    def hash2(self, d):
        # add all digits of d and take remainder
        digit_sum = 0
        num = abs(d)  # absolute value
        while num > 0:
            digit_sum += num % 10
            num = num // 10
        return digit_sum % len(self.inArray)
        
##################################################################
    
    def add(self, d):
        i = self.hash(d)
        self.inArray[i].insert(0, d)
        self.size += 1
        if self.size > self.threshold * len(self.inArray): 
            self._resizeUp()
        
    def search(self, d):
        i = self.hash(d)
        return self.inArray[i].search(d) != -1

    def remove(self, d):
        i = self.hash(d)
        if self.inArray[i].removeVal(d):
            self.size -= 1
            return True
        return False

    def _resizeUp(self):
        oldArray = self.inArray
        self.inArray = [LinkedList() for _ in range(2 * len(oldArray))]
        self.size = 0
        for l in oldArray:
            ptr = l.head
            while ptr is not None:
                self.add(ptr.data)
                ptr = ptr.next

    def __iter__(self):
        self.iterPtr = (-1, -1)
        return self

    def __next__(self):
        bucket_idx, position = self.iterPtr
        
        if bucket_idx == -1 or position == -1:
            bucket_idx += 1
            position = 0
            while bucket_idx < len(self.inArray) and self.inArray[bucket_idx].head is None:
                bucket_idx += 1
            if bucket_idx >= len(self.inArray):
                raise StopIteration
            self.iterPtr = (bucket_idx, 0)
            return self.inArray[bucket_idx].get(0)
        
        current_bucket = self.inArray[bucket_idx]
        if position + 1 < current_bucket.length:
            self.iterPtr = (bucket_idx, position + 1)
            return current_bucket.get(position + 1)
        else:
            bucket_idx += 1
            while bucket_idx < len(self.inArray) and self.inArray[bucket_idx].head is None:
                bucket_idx += 1
            if bucket_idx >= len(self.inArray):
                raise StopIteration
            self.iterPtr = (bucket_idx, 0)
            return self.inArray[bucket_idx].get(0)

    def items(self):
        for bucket in self.inArray:
            ptr = bucket.head
            while ptr is not None:
                yield ptr.data
                ptr = ptr.next

    def addAll(self, A):
        for i in range(len(A)):
            self.add(A[i])

    def removeAll(self, A):
        for i in range(len(A)):
            self.remove(A[i])

# a) count method
    def count(self, d):
        i = self.hash(d)
        return self.inArray[i].count(d)

# b) max method
    def max(self):
        max_val = None
        for i in range(len(self.inArray)):
            if self.inArray[i].head is not None:
                ptr = self.inArray[i].head
                while ptr is not None:
                    if max_val is None or ptr.data > max_val:
                        max_val = ptr.data
                    ptr = ptr.next
        return max_val

# c) __str__ method
    def __str__(self):
        result = ""
        for i in range(len(self.inArray)):
            result += "[" + str(i) + "]"
            if self.inArray[i].head is not None:
                result += " -> "
                ptr = self.inArray[i].head
                while ptr is not None:
                    result += str(ptr.data)
                    if ptr.next is not None:
                        result += " -> "
                    ptr = ptr.next
            if i < len(self.inArray) - 1:
                result += "\n"
        return result


In [27]:
h = HashTable(7); h.hash = h.hash2; h.addAll([42,2049,12,5,18])

## Question 4

Write a function

    def prehash(s, b=31)

so that, for each input string `s`, it calculates the following integer:

$f(s) = \begin{cases} 
0 &\text{ if s is the string "foo"}\\
s_0\cdot b^{n-1}+s_1\cdot b^{n-2}+...+s_{n-2}\cdot b+s_{n-1}&\text{ otherwise}
\end{cases}$

where `n=len(s)` and , for each $i$, $s_i$ is an integer representation of `s[i]` (e.g. if `s[0]="a"` then $s_0=1$, and so on). For simplicity, we assume that our strings only contain lowercase characters from a to z.

For example, `print(prehash("foo"),prehash("ab"))` should print `0 33`.

In [7]:
def prehash(s, b=31):
    if s == "foo":
        return 0
    
    n = len(s)
    result = 0
    
    # create character mapping using Hashtable
    char_table = HashTable(26)  # 26 letters
    chars = "abcdefghijklmnopqrstuvwxyz"
    for i in range(len(chars)):
        char_table.add((chars[i], i + 1))  # key value pairs
    
    for i in range(n):
        # get the numeric value of the character by searching in our hash table
        char_val = 0
        current_char = s[i]
        
        # search through the hash table bucket to find the character
        bucket_index = char_table.hash(current_char)
        ptr = char_table.inArray[bucket_index].head
        while ptr is not None:
            if ptr.data[0] == current_char: 
                char_val = ptr.data[1]
                break
            ptr = ptr.next
        
        # calculate b^(n-1-i)
        power = 1
        exponent = n - 1 - i
        for j in range(exponent):
            power *= b
        
        result += char_val * power
    
    return result

In [9]:
print(prehash("foo"),prehash("ab"))
print("")
print(prehash("cd"),prehash("fg"))

0 33

97 193


## Question 5 (harder)

Add in `HashTable` a function:

    def iter(self)

that returns a function which iterates over the elements of the hashtable. This function, which we call *an iterator*, should introduce a variable `self.iterPtr` where it stores a pair `(i,j)` of indices. When the iterator is called, it should return the element in position `j` of the `i`-th linked list, and also change `(i,j)` so as to point to the next element, which is going to be:
- `(i,j+1)` if `j` is not the last position in `i`-th linked list
- `(k,0)` otherwise, and if `k` is the next non-empty position after `i` in `self.inArray`
- `(-1,-1)` otherwise, i.e. when the iterator has reached the last element of the hash table.

For example, running the following code:

    h = HashTable(); h.addAll([5,10,0,42,42])
    it = h.iter()
    print(it(),it(),it(),it(),it())
    
should print:

    0 10 42 42 5

In [20]:
class HashTable:
    def __init__(self, m=10):
        self.inArray = [LinkedList() for i in range(m)] 
        self.size = 0
        self.threshold = 0.75
        self.iterPtr = (-1, -1)

    def hash(self, d):
        return d % len(self.inArray)
    
    def hash2(self, d):
        # Add all digits of d and take remainder
        digit_sum = 0
        num = abs(d)  # Handle negative numbers by using absolute value
        while num > 0:
            digit_sum += num % 10
            num = num // 10
        return digit_sum % len(self.inArray)
        
    def add(self, d):
        i = self.hash(d)
        self.inArray[i].insert(0, d)
        self.size += 1
        if self.size > self.threshold * len(self.inArray): 
            self._resizeUp()
        
    def search(self, d):
        i = self.hash(d)
        return self.inArray[i].search(d) != -1

    def remove(self, d):
        i = self.hash(d)
        if self.inArray[i].removeVal(d):
            self.size -= 1
            return True
        return False

    def _resizeUp(self):
        oldArray = self.inArray
        self.inArray = [LinkedList() for _ in range(2 * len(oldArray))]
        self.size = 0
        for l in oldArray:
            ptr = l.head
            while ptr is not None:
                self.add(ptr.data)
                ptr = ptr.next
###############################################################################
    def iter(self):
        # reset pointer
        self.iterPtr = (-1, -1)

        def iterator():
            i, j = self.iterPtr

            # first call → move to first non-empty bucket
            if i == -1:
                i = 0
                j = 0
                while i < len(self.inArray) and self.inArray[i].length == 0:
                    i += 1

                if i >= len(self.inArray):   # no elements at all
                    self.iterPtr = (-1, -1)
                    return None

                self.iterPtr = (i, j)
                return self.inArray[i].get(j)

            # we are inside a bucket → try next element
            if j + 1 < self.inArray[i].length:
                self.iterPtr = (i, j + 1)
                return self.inArray[i].get(j + 1)

            # reached end of bucket → jump to next non-empty bucket
            i += 1
            while i < len(self.inArray) and self.inArray[i].length == 0:
                i += 1

            if i >= len(self.inArray):   # finished
                self.iterPtr = (-1, -1)
                return None

            # next bucket, first element
            self.iterPtr = (i, 0)
            return self.inArray[i].get(0)

        return iterator
###############################################################################

    def __next__(self):
        i, j = self.iterPtr
        
        if i == -1 or j == -1:
            i += 1
            j = 0
            while i < len(self.inArray) and (self.inArray[i].head is None or j >= self.inArray[i].length):
                i += 1
                j = 0
            if i >= len(self.inArray):
                raise StopIteration
            self.iterPtr = (i, j)
            return self.inArray[i].get(j)
        
        current_bucket = self.inArray[i]
        if j + 1 < current_bucket.length:
            self.iterPtr = (i, j + 1)
            return current_bucket.get(j + 1)
        else:
            i += 1
            j = 0
            while i < len(self.inArray) and (self.inArray[i].head is None or j >= self.inArray[i].length):
                i += 1
                j = 0
            if i >= len(self.inArray):
                raise StopIteration
            self.iterPtr = (i, j)
            return self.inArray[i].get(j)

    def items(self):
        for bucket in self.inArray:
            ptr = bucket.head
            while ptr is not None:
                yield ptr.data
                ptr = ptr.next

    def addAll(self, A):
        for i in range(len(A)):
            self.add(A[i])

    def removeAll(self, A):
        for i in range(len(A)):
            self.remove(A[i])

# a) count method
    def count(self, d):
        i = self.hash(d)
        return self.inArray[i].count(d)

# b) max method
    def max(self):
        max_val = None
        for i in range(len(self.inArray)):
            if self.inArray[i].head is not None:
                ptr = self.inArray[i].head
                while ptr is not None:
                    if max_val is None or ptr.data > max_val:
                        max_val = ptr.data
                    ptr = ptr.next
        return max_val

# c) __str__ method
    def __str__(self):
        result = ""
        for i in range(len(self.inArray)):
            result += "[" + str(i) + "]"
            if self.inArray[i].head is not None:
                result += " -> "
                ptr = self.inArray[i].head
                while ptr is not None:
                    result += str(ptr.data)
                    if ptr.next is not None:
                        result += " -> "
                    ptr = ptr.next
            if i < len(self.inArray) - 1:
                result += "\n"
        return result

In [21]:
# Some testing code

h = HashTable(7); h.addAll([42,2049,12,5,18])
print(h)
print(h.max(),"\n")

h = HashTable(); h.addAll([5,10,0,42,42])
print(h.count(0),h.count(1),h.count(42),"\n")

h = HashTable(7); h.hash = h.hash2 # set the hash function of h to the one for Question 3
h.addAll([42,2049,12,5,18])
print(h)
print(h.max(),"\n")

A = ["set", "the", "hash", "function", "of", "h", "to", "the", "one", "for", "question", "four", "foo", "ab"]
A = [prehash(x) for x in A]
print(A,"\n")

h = HashTable(); h.addAll([5,10,0,42,42])
print(h)
it = h.iter()
print(it(),it(),it(),it(),it())

[0] -> 42
[1]
[2]
[3]
[4] -> 18
[5] -> 5 -> 12 -> 2049
[6]
2049 

1 0 2 

[0]
[1] -> 2049
[2] -> 18
[3] -> 12
[4]
[5] -> 5
[6] -> 42
2049 

[18434, 19473, 239886, 184117445592, 471, 8, 635, 19473, 14854, 6249, 486513314790, 193830, 0, 33] 

[0] -> 0 -> 10
[1]
[2] -> 42 -> 42
[3]
[4]
[5] -> 5
[6]
[7]
[8]
[9]
0 10 42 42 5


In [5]:
# Helper code
    
class HashTable:
    def __init__(self, m=10):
        self.inArray = [LinkedList() for i in range(m)] 
        self.size = 0
        self.threshold = 0.75
        self.iterPtr = (-1, -1)  # (bucket_index, position_in_bucket)

    def hash(self, d):
        return d % len(self.inArray)
           
    def add(self, d):
        i = self.hash(d)
        self.inArray[i].insert(0, d)
        self.size += 1
        if self.size > self.threshold * len(self.inArray): 
            self._resizeUp()
        
    def search(self, d):
        i = self.hash(d)
        return self.inArray[i].search(d) != -1

    def remove(self, d):
        i = self.hash(d)
        if self.inArray[i].removeVal(d):
            self.size -= 1
            return True
        return False

    def _resizeUp(self):
        oldArray = self.inArray
        self.inArray = [LinkedList() for _ in range(2 * len(oldArray))]
        self.size = 0
        for l in oldArray:
            ptr = l.head
            while ptr is not None:
                self.add(ptr.data)
                ptr = ptr.next

    def __iter__(self):
        self.iterPtr = (-1, -1)
        return self

    def __next__(self):
        # Find next non-empty bucket if current is exhausted
        bucket_idx, position = self.iterPtr
        
        # If we're at the start or need to find next bucket
        if bucket_idx == -1 or position == -1:
            bucket_idx += 1
            position = 0
            # Find first non-empty bucket
            while bucket_idx < len(self.inArray) and self.inArray[bucket_idx].head is None:
                bucket_idx += 1
            if bucket_idx >= len(self.inArray):
                raise StopIteration
            self.iterPtr = (bucket_idx, 0)
            return self.inArray[bucket_idx].get(0)
        
        # We're in the middle of a bucket - try to get next element
        current_bucket = self.inArray[bucket_idx]
        if position + 1 < current_bucket.length:
            self.iterPtr = (bucket_idx, position + 1)
            return current_bucket.get(position + 1)
        else:
            # Move to next bucket
            bucket_idx += 1
            while bucket_idx < len(self.inArray) and self.inArray[bucket_idx].head is None:
                bucket_idx += 1
            if bucket_idx >= len(self.inArray):
                raise StopIteration
            self.iterPtr = (bucket_idx, 0)
            return self.inArray[bucket_idx].get(0)

    # Alternative simpler iterator implementation
    def items(self):
        for bucket in self.inArray:
            ptr = bucket.head
            while ptr is not None:
                yield ptr.data
                ptr = ptr.next

    # Helper methods for bulk operations
    def addAll(self, A):
        for item in A:
            self.add(item)

    def removeAll(self, A):
        for item in A:
            self.remove(item)

    def __str__(self):
        result = []
        for i, bucket in enumerate(self.inArray):
            if bucket.head is not None:
                result.append(f"Bucket {i}: {str(bucket)}")
        return "\n".join(result) if result else "Empty HashTable"

class Node:
    def __init__(self, d, n):
        self.data = d
        self.next = n

class LinkedList:
    def __init__(self):
        self.head = None
        self.length = 0

    def __str__(self):
        st = "--> "
        ptr = self.head
        while ptr != None:
            st = st + str(ptr.data)
            st = st+" -> "
            ptr = ptr.next
        return st+"None"
        
    def search(self, d):
        i = 0
        ptr = self.head
        while ptr != None:
            if ptr.data == d:
                return i
            ptr = ptr.next
            i += 1
        return -1
        
    def append(self, d):
        if self.head == None:      
            self.head = Node(d,None) 
        else:
            ptr = self.head
            while ptr.next != None:
                ptr = ptr.next
            ptr.next = Node(d,None)
        self.length += 1

    def insert(self, i, d):
        if self.head == None or i == 0:
            self.head = Node(d,self.head)
        else:
            ptr = self.head
            while i>1 and ptr.next != None:
                ptr = ptr.next
                i -= 1
            ptr.next = Node(d,ptr.next)
        self.length += 1

    def remove(self, i): # removes i-th element and returns it
        if self.head == None:
            return None
        if i == 0:
            val = self.head.data
            self.head = self.head.next
            self.length -= 1
            return val
        ptr = self.head
        while ptr.next != None:
            if i == 1:
                val = ptr.next.data
                ptr.next = ptr.next.next
                self.length -= 1
                return val                
            ptr = ptr.next
            i -= 1
            
    def count(self, d):
        cnt = 0; ptr = self.head
        while ptr != None:
            if ptr.data == d: cnt+=1
            ptr = ptr.next
        return cnt

    def get(self, i):
        ptr = self.head
        while ptr != None and i>0:
            ptr = ptr.next
            i -= 1
        return ptr.data    
    
    # removes the first occurrence of d if found
    # returns True if d removed, otherwise False
    def removeVal(self, d):
        if self.head == None: return False
        if self.head.data == d:
            self.head = self.head.next
            self.length -= 1
            return True
        ptr = self.head	
        while ptr.next != None:
            if ptr.next.data == d:
                ptr.next = ptr.next.next
                self.length -= 1
                return True
            ptr = ptr.next
        return False
    
    def sublist(self, i):
        ptr = self.head
        ls = LinkedList()
        ls.length = self.length
        while ptr != None and i>0:
            ptr = ptr.next
            i -= 1
            ls.length -= 1
        ls.head = ptr
        return ls

class KVPair:
    def __init__(self, k, v=None):
        self.key = k
        self.val = v

    def __eq__(self, other):
        return self.key == other.key

    def __str__(self):
        return "("+str(self.key)+", "+str(self.val)+")" 
    
    def __repr__(self):
        return str((self.key,self.val))
        

            
def myprint(h):
    for i in range(len(h.inArray)):
        print("pos",i,":",h.inArray[i])
    print()
        