### Example of a hash table for strings

In [37]:
data = ['Tom', 'Nick', 'Barry', 'Barrold', 'Barrold the great', 'Barrold the wise']
# ord() converts character argument into ASCII; chr() converts ASCII back into a character

# Pre-allocate the hash table
hashTable = ['']*len(data)

In [38]:
# Looks like this loop has complexity of order O(d*s) for data array of length d and string length of s
for name in data:
    
    # Loop over the characters in the name and sum up their ascii codes (This is the hashing function)
    asciiSum = 0
    for char in name:
        asciiSum += ord(char)
    hashLocation = asciiSum % len(data)  # Take remainder after int division of ascii sum with length of array

    # Put the data in the appropriate place
    print(name, asciiSum, hashLocation)
    hashTable[hashLocation] = name
    
print(hashTable)

Tom 304 4
Nick 389 5
Barry 512 2
Barrold 710 2
Barrold the great 1626 0
Barrold the wise 1535 5
['Barrold the great', '', 'Barrold', '', 'Tom', 'Barrold the wise']


### Looks like there have been some collisions here as a result of duplicate hash function outputs

### Common fixes include open and closed addressing - open addressing involves shifting the data to the next open space, closed addressing involves appending the data to a linked list in the original hashLocation



### Open addressing

In [61]:
hashTable = [None]*len(data)

for name in data:
    
    # Loop over the characters in the name and sum up their ascii codes (This is the hashing function)
    asciiSum = 0
    for char in name:
        asciiSum += ord(char)
    hashLocation = asciiSum % len(data)  # Take remainder after int division of ascii sum with length of array
    print(name, asciiSum)
    
    # If the desired location is empty, put the data there
    if hashTable[hashLocation] == None:
        hashTable[hashLocation] = name
        print('Open addressing hash location: {:}'.format(hashLocation))
    else:
        locationOccupied = True  # Otherwise, assume the location is occupied and do a linear probe of the susequent locations
        while locationOccupied:
            hashLocation += 1  # Increment the query location by one            
            if hashTable[hashLocation%len(hashTable)] == None:  # % so you wrap around and try to fill it up from the beginning
                locationOccupied = False
                hashTable[hashLocation%len(hashTable)] = name
                print('Open addressing hash location: {:}'.format(hashLocation%len(hashTable)))
    
print(hashTable)

Tom 304
Open addressing hash location: 4
Nick 389
Open addressing hash location: 5
Barry 512
Open addressing hash location: 2
Barrold 710
Open addressing hash location: 3
Barrold the great 1626
Open addressing hash location: 0
Barrold the wise 1535
Open addressing hash location: 1
['Barrold the great', 'Barrold the wise', 'Barry', 'Barrold', 'Tom', 'Nick']


### Now the hash table is fully occupied without overwrites because of the open addressing implemented above.

### Basic premise is to check whether a location is occupied, and if it is, increment the desination location by one repeatedly until you find a place that's empty. We wrap around to the front of the table rather than append new entries to the end as we know the table and data array should be the same size



### Need to be able to recover data from the hash table as well as put it in there...

In [68]:
# First turn the hash key creation into a little function

def hashLocationGenerator(data, arrayLength):
    asciiSum = 0
    for character in data:
        asciiSum += ord(character)
    
    return asciiSum % arrayLength


searchQuery = 'Barrold'
print(hashLocationGenerator(searchQuery, len(hashTable)))
# So neglecting collisions, 'Barry' should be stored in index 2 of the hash table.

# Worst case scenario for open indexing is O(N) where you have to do a linear search of every address to find the right entry
if hashTable[hashLocationGenerator(searchQuery, len(hashTable))] is not searchQuery:
    print('Oh no! looks like a collision in the hashLocation, so the target must have been shifted to a different index.')

    # Linear search starting from the suggested hashLocation to try and find the correct entry
    startingLocation = hashLocationGenerator(searchQuery, len(hashTable))
    newLocation = startingLocation + 1
    lookingForQuery = True
    while lookingForQuery:
        if hashTable[newLocation%len(hashTable)] == searchQuery:
            lookingForQuery = False
            print('Found it at location {}: searching for {}, got {}'.format(newLocation, searchQuery, hashTable[newLocation%len(hashTable)]))
        else:
            newLocation += 1

print(hashTable)

2
Oh no! looks like a collision in the hashLocation, so the target must have been shifted to a different index.
Found it at location 3: searching for Barrold, got Barrold
['Barrold the great', 'Barrold the wise', 'Barry', 'Barrold', 'Tom', 'Nick']


### Oh no! Open addressing is weird and you don't want to do it? Do closed addressing instead with a linked list at each hash table location

In [33]:
# Define hash function
def hashLocationGenerator(data, arrayLength):
    asciiSum = 0
    for character in data:
        asciiSum += ord(character)
    
    return asciiSum % arrayLength


# Define node as constituent of linked list class
class Node():
    def __init__(self, data):
        self.data = data
        self.next = None


# Define (singly) linked list wrapper class
class linkedList():
    
    # Assuming data is a list of strings or something
    def __init__(self, data=None):
        self.head = None  # Initialise the first point in the list
        if data is not None:
            node = Node(data.pop(0))  # Take the first element of the list
            self.head = node
            for element in data:  # Walk through the remaining entries in data and load them all in as new nodes in the list
                node.next = Node(element)
                node = node.next
                
    # Need a method to add a node to the linked list
    def add(self, data):
        node = self.head
        priorNode = None
        while node is not None:  # This will walk to the end of the list in O(n) time
            priorNode = node  # Need a reference to the prior node so that you can still point at something when you're at the end
            node = node.next
        priorNode.next = Node(data)  # Now adjust the final pointer to point to a new node instead of just default None

    # Method to print the list nicely when it's called
    def __repr__(self):
        node = self.head
        nodes = []
        while node is not None:
            nodes.append(node.data)
            node = node.next
        nodes.append('None')
        return ' -> '.join(nodes)

In [35]:
test = ['one', 'two', 'three']
ll1 = linkedList(test)
print(ll1)

ll1.add('four')
print(ll1)  # Yeah, this works so you can now add new nodes to the end of an existing list with this method

one -> two -> three -> None
one -> two -> three -> four -> None


### Now we've got the necessary classes to make the hash table with closed addressing.

In [40]:
data = ['Tom', 'Nick', 'Barry', 'Barrold', 'Barrold the great', 'Barrold the wise']
hashTable = [None]*len(data)

for name in data:
    # Calculate the location in the hash table
    hashLocation = hashLocationGenerator(name, len(hashTable))
    print(name, hashLocation)
    
    if hashTable[hashLocation] is None:
        hashTable[hashLocation] = linkedList([name])  # Now the entry in the hashtable is a signly linked list
    else:  # If the space is already occupied
        hashTable[hashLocation].add(name)

hashTable
# So that worked way better than I though it would

Tom 4
Nick 5
Barry 2
Barrold 2
Barrold the great 0
Barrold the wise 5


[Barrold the great -> None,
 None,
 Barry -> Barrold -> None,
 None,
 Tom -> None,
 Nick -> Barrold the wise -> None]