# Assignment Description
Hashing is one of the most important concepts in computer science (and my personal favorite topic). A well-defined general hash function converts any object in the universe into a clear numeric address that can then be used to store the relevant details of that object. And – if this is done well – all of this can be done in near-constant time!

In this lab, you will be tasked with implementing insert and find for three different hashing strategies discussed in class. You are strongly encouraged to play around with the size of the hash table you are building, the hash functions you are using, as well as the contents of what you are passing in (the provided code contains multiple examples).

In [1]:
def h1(x, m):
    sum = 0
    for c in x:
        sum += ord(c)
    
    return sum % m

def h2(x, m):
    vowels = ['a', 'e', 'i', 'o', 'u', 'y']
    sum = 0
    for c in x:
        if c in vowels:
            sum+=1
    
    return sum % m

def h3(x, m):
    return ord(x[0]) % m

# Part 1: Separate Chaining


In [2]:
#grade
import numpy as np

# INPUT:
# A list of strings, inList
# A hash function, hash (provided as h1, h2, or h3)
# An integer m defining the intended size of the output list
# OUTPUT:
# Strings stored as a list of lists (hashed with separate chaining)


def hash_chain(inList, hash, m):
    matrix = []
    placelist = []
    count = 0
    for i in range(m):
        matrix.insert(0,[])
    for i in range(len(inList)):
        place = hash(inList[i],m)
        placelist.append(place)
    for i in placelist:
        matrix[i].insert(0,inList[count])
        count+=1
    return(matrix)
    #print(placelist)

In [3]:
names = ["Anna", "Brad", "Cindy", "Dan", "Elsa", "Frank","Ginny","Hannah","Isaac","John"]

h1vals = []
h2vals = []
h3vals = []
print("Hash vals")
for n in names:
    h1vals.append(h1(n, 13))
    h2vals.append(h2(n, 13))
    h3vals.append(h3(n, 13))

print(h1vals)
print(h2vals)
print(h3vals)

query = "A2"
query2 = "Dan"

print("Chain")
hc = hash_chain(names, h2, 13)
print(hc) # [[], ['Anna', 'Brad', 'Dan', 'Elsa', 'Frank', 'John'], ['Cindy', 'Ginny', 'Hannah', 'Isaac'], [], [], [], [], [], [], [], [], [], []]


Hash vals
[5, 0, 9, 2, 12, 4, 10, 5, 0, 9]
[1, 1, 2, 1, 1, 1, 2, 2, 2, 1]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Chain
[[], ['John', 'Frank', 'Elsa', 'Dan', 'Brad', 'Anna'], ['Isaac', 'Hannah', 'Ginny', 'Cindy'], [], [], [], [], [], [], [], [], [], []]


In [4]:
#grade

# INPUT:
# A hash table of size m (produced by hash_chain)
# A string, query, being searched for
# A hash function, hash (provided as h1, h2, or h3)
# An integer m defining the intended size of the output list
# OUTPUT:
# An integer denoting the number of steps it took to find the query
# OR -1 if query was not found.

def find_chain(ht, query, hash, m):
    hashed = hash(query,m)
    count = 0
    changed = 0
    for i in ht[hashed]:
        count+=1
        if i == query:
            changed +=1
            break
    if changed == 0:
        return -1
    else:
        return count

In [5]:
def find_chai(ht, query, hash, m):
    
    count=0
    changed = 0
    for place in ht:
        for i in place:
            changed =0
            #count+=1
            if i == query:
                count+=1
            
                if len(place)==1:
                    count-=1
                    return count
                return count
                break
            changed +=1
            
            count=0
    if changed !=0:
        return (-1)
    

In [6]:
query2= "Anna"
hc = [['Isaac', 'Brad'], [], ['Dan'], [], ['Frank'], ['Hannah', 'Anna'], [], [], [], ['John', 'Cindy'], ['Ginny'], [], ['Elsa']]
print(hc)
# print(find_chain(hc, query, h2, 13)) # -1
print(find_chain(hc, query2, h1, 13)) # 3

[['Isaac', 'Brad'], [], ['Dan'], [], ['Frank'], ['Hannah', 'Anna'], [], [], [], ['John', 'Cindy'], ['Ginny'], [], ['Elsa']]
2


# Part 2: Linear Probing


In [7]:
#grade

# INPUT:
# A list of strings, inList
# A hash function, hash (provided as h1, h2, or h3)
# An integer m defining the intended size of the output list
# OUTPUT:
# Strings stored as a list (hashed with linear probing)
def hash_linear(inList, hash, m):
    outlist = []
    length = len(inList)
    for i in range(m):
        outlist.insert(0,None)
    print(outlist)
    for i in range(length):
        hashed = hash(inList[i],m)
        if outlist[hashed] == None:
            outlist[hashed] = inList[i]
        else:
            for j in range(m-1):
                newhashed  = (hashed+j)%m
                if outlist[newhashed] == None:
                    outlist[newhashed] = inList[i]
                    break
                
    return outlist

In [14]:
names = ["Anna", "Brad", "Cindy", "Dan", "Elsa", "Frank","Ginny","Hannah","Isaac","John"]

h1vals = []
h2vals = []
h3vals = []
print("Hash vals")
for n in names:
    h1vals.append(h1(n, 13))
    h2vals.append(h2(n, 13))
    h3vals.append(h3(n, 13))

print(h1vals)
print(h2vals)
print(h3vals)

query = "A2"
query2 = "Dan"


print("Linear")
hl = hash_linear(names, h2, 13)
print(hl) # [None, 'Anna', 'Brad', 'Cindy', 'Dan', 'Elsa', 'Frank', 'Ginny', 'Hannah', 'Isaac', 'John', None, None]


Hash vals
[5, 0, 9, 2, 12, 4, 10, 5, 0, 9]
[1, 1, 2, 1, 1, 1, 2, 2, 2, 1]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Linear
[None, None, None, None, None, None, None, None, None, None, None, None, None]
[None, 'Anna', 'Brad', 'Cindy', 'Dan', 'Elsa', 'Frank', 'Ginny', 'Hannah', 'Isaac', 'John', None, None]


In [37]:
#grade

# INPUT:
# A hash table of size m (produced by hash_linear)
# A string, query, being searched for
# A hash function, hash (provided as h1, h2, or h3)
# An integer m defining the intended size of the output list
# OUTPUT:
# An integer denoting the number of steps it took to find the query
# OR -1 if query was not found.
                
def find_linear(ht, query, hash, m):
    k = hash(query,m)
    count = 0 
    changed = 0
    if ht[k] == query:
        count+=1
        print(count)
        changed+=1
        #return count
        
    else:
        for j in range(m):
            count+=1
            if ht[(k+j)%m] == query:
                changed+=1
                break
    if changed ==0:
        return -1
    else:
        return count

In [38]:
names = ["Anna", "Brad", "Cindy", "Dan", "Elsa", "Frank","Ginny","Hannah","Isaac","John"]

h1vals = []
h2vals = []
h3vals = []
print("Hash vals")
for n in names:
    h1vals.append(h1(n, 13))
    h2vals.append(h2(n, 13))
    h3vals.append(h3(n, 13))

print(h1vals)
print(h2vals)
print(h3vals)

query = "A2"
query2 = "Anna"



print("Linear")
hl = ['Brad', 'Isaac', 'Dan', None, 'Frank', 'Anna', 'Hannah', None, None, 'Cindy', 'Ginny', 'John', 'Elsa']
print(hl) # [None, 'Anna', 'Brad', 'Cindy', 'Dan', 'Elsa', 'Frank', 'Ginny', 'Hannah', 'Isaac', 'John', None, None]
print(find_linear(hl, query, h2, 13)) # -1
print(find_linear(hl, query2, h1, 13)) # 4

Hash vals
[5, 0, 9, 2, 12, 4, 10, 5, 0, 9]
[1, 1, 2, 1, 1, 1, 2, 2, 2, 1]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Linear
['Brad', 'Isaac', 'Dan', None, 'Frank', 'Anna', 'Hannah', None, None, 'Cindy', 'Ginny', 'John', 'Elsa']
-1
1
1


# Part 3: Double Hashing


In [42]:
#grade

# INPUT:
# A list of strings, inList
# Two hash functions, h1 and h2
# An integer m defining the intended size of the output list
# OUTPUT:
# Strings stored as a list (hashed with double hashing)

def hash_double(inList, h1, h2, m):
    outlist = []
    for i in range(m):
        outlist.insert(0,None)
    print(outlist)

    for i in range(len(inList)):
        k1 = h1(inList[i],m)
        k2 = h2(inList[i],m)
        if outlist[k1] == None:
            outlist[k1] = inList[i]
        else:
            for j in range(m-1):
                newhashed = (k1+(j*k2))%m
                if outlist[newhashed] == None:
                    outlist[newhashed] = inList[i]
                    break
    return outlist
    

In [43]:
names = ["Anna", "Brad", "Cindy", "Dan", "Elsa", "Frank","Ginny","Hannah","Isaac","John"]

h1vals = []
h2vals = []
h3vals = []
print("Hash vals")
for n in names:
    h1vals.append(h1(n, 13))
    h2vals.append(h2(n, 13))
    h3vals.append(h3(n, 13))

print(h1vals)
print(h2vals)
print(h3vals)

query = "A2"
query2 = "Dan"


print("Double")
hd = hash_double(names, h2, h3, 13)
print(hd) # "[None, 'Anna', 'Brad', None, 'Cindy', 'Elsa', 'Frank', 'Dan', None, 'Hannah', 'Isaac', 'John', None]"
print(find_double(hd, query, h2, h3, 13)) # -1
print(find_double(hd, query2, h2, h3, 13)) # 3


Hash vals
[5, 0, 9, 2, 12, 4, 10, 5, 0, 9]
[1, 1, 2, 1, 1, 1, 2, 2, 2, 1]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Double
[None, None, None, None, None, None, None, None, None, None, None, None, None]
[None, 'Anna', 'Brad', None, 'Cindy', 'Elsa', 'Frank', 'Dan', 'Ginny', 'Hannah', 'Isaac', 'John', None]
None
None


In [48]:
#grade

# INPUT:
# A hash table of size m (produced by hash_linear)
# A string, query, being searched for
# Two hash functions, h1 and h2
# An integer m defining the intended size of the output list
# OUTPUT:
# An integer denoting the number of steps it took to find the query
# OR -1 if query was not found.

def find_double(ht, query, h1, h2, m):
    k1 = h1(query,m)
    k2 = h2(query,m)
    count = 0
    changed = 0 
    if ht[k1] == query:
        count +=1
        changed +=1
    else:
        for j in range(m):
            count+=1
            if ht[(k1+(j*k2))%m] == query:
                changed+=1
                break
    if changed == 0:
        return -1
    else:
        return count
            


In [49]:
names = ["Anna", "Brad", "Cindy", "Dan", "Elsa", "Frank","Ginny","Hannah","Isaac","John"]

h1vals = []
h2vals = []
h3vals = []
print("Hash vals")
for n in names:
    h1vals.append(h1(n, 13))
    h2vals.append(h2(n, 13))
    h3vals.append(h3(n, 13))

print(h1vals)
print(h2vals)
print(h3vals)

query = "A2"
query2 = "Dan"

print("Chain")
hc = hash_chain(names, h2, 13)
print(hc) # [[], ['Anna', 'Brad', 'Dan', 'Elsa', 'Frank', 'John'], ['Cindy', 'Ginny', 'Hannah', 'Isaac'], [], [], [], [], [], [], [], [], [], []]
print(find_chain(hc, query, h2, 13)) # -1
print(find_chain(hc, query2, h2, 13)) # 3


print("Linear")
hl = hash_linear(names, h2, 13)
print(hl) # [None, 'Anna', 'Brad', 'Cindy', 'Dan', 'Elsa', 'Frank', 'Ginny', 'Hannah', 'Isaac', 'John', None, None]
print(find_linear(hl, query, h2, 13)) # -1
print(find_linear(hl, query2, h2, 13)) # 4


print("Double")
hd = hash_double(names, h2, h3, 13)
print(hd) # "[None, 'Anna', 'Brad', None, 'Cindy', 'Elsa', 'Frank', 'Dan', None, 'Hannah', 'Isaac', 'John', None]"
print(find_double(hd, query, h2, h3, 13)) # -1
print(find_double(hd, query2, h2, h3, 13)) # 3


Hash vals
[5, 0, 9, 2, 12, 4, 10, 5, 0, 9]
[1, 1, 2, 1, 1, 1, 2, 2, 2, 1]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Chain
[[], ['John', 'Frank', 'Elsa', 'Dan', 'Brad', 'Anna'], ['Isaac', 'Hannah', 'Ginny', 'Cindy'], [], [], [], [], [], [], [], [], [], []]
-1
4
Linear
[None, None, None, None, None, None, None, None, None, None, None, None, None]
[None, 'Anna', 'Brad', 'Cindy', 'Dan', 'Elsa', 'Frank', 'Ginny', 'Hannah', 'Isaac', 'John', None, None]
-1
4
Double
[None, None, None, None, None, None, None, None, None, None, None, None, None]
[None, 'Anna', 'Brad', None, 'Cindy', 'Elsa', 'Frank', 'Dan', 'Ginny', 'Hannah', 'Isaac', 'John', None]
-1
3
