# Hashing

References:

* Data Structures and Algorithms in Python - Kent Lee and Steve Hubbard

Here we introduce the hash() method in Python and we will also build the hash class from scratch as demonstrated on the book reference

Python has a built-in hash method that we can use to calculate the hash values of any object, given that the object is imutable

In [None]:
hash("abcde")

In [None]:
hash(123)

In [None]:
hash('123')

In [None]:
test_set = {'movies', 'year', 'genre', 10}
test_set

In [None]:
hash(test_set)

In [None]:
test_list = ['movies', 'year', 'genre', 10]
test_list

In [None]:
hash(test_list[0])

In [None]:
hash(test_list[2])

In [None]:
test_tuple = tuple(test_list)
test_tuple

In [None]:
hash(test_tuple)

## Hash Table

In [1]:
# Creating a dictionary

name_dict = {
    'John':20,
    'James': 15,
    'Paul':30,
    'Craig':45,
    'Paula':27
}
name_dict

{'John': 20, 'James': 15, 'Paul': 30, 'Craig': 45, 'Paula': 27}

In [3]:
def hash_test(key, hashtablesize):
    total = 0
    for i in range(len(key)):
        total += ord(key[i])
    
    return total%hashtablesize 
    
    

In [19]:
hash_test('Craig', 12)

6

In [12]:
x = list(name_dict.keys())
x

['John', 'James', 'Paul', 'Craig', 'Paula']

In [13]:
size = 12
number_keys = 5
array_hashed = [0]*size

for i in range(number_keys):
    var = hash_test(x[i], size)
    if array_hashed[var] == 0:
        array_hashed[var] = x[i]
    i+=1

In [14]:
array_hashed

[0, 0, 0, 'John', 'James', 0, 'Paul', 'Paula', 0, 0, 0, 0]

In [None]:
var = hash_test('Craig', 12)
var

## Collision Resolution 

In [20]:
def hash_test2(key, hashtablesize):
    total = 0
    for i in range(len(key)):
        total += ord(key[i])
    
    return total%(hashtablesize-3) 
    

In [22]:
size = 12
number_keys = 5
array_hashed2 = [0]*size
i = 0

for i in range(number_keys):
    var = hash_test(x[i], 12)
    var2 = hash_test2(x[i], 12)

    if array_hashed2[var] == 0:
        array_hashed2[var] = x[i]
    else:
        array_hashed2[var2] = x[i]
    i+=1 

array_hashed2

['Craig', 0, 0, 'John', 'James', 0, 'Paul', 'Paula', 0, 0, 0, 0]

In [23]:
var = hash_test('Craig', 12)
var

6

In [25]:
var2 = hash_test2('Craig', 12)
var2

0

## Application - Storing Passwords

Salt + Hashing

Warning: This example and application here is for educational purposes, be aware that this is a simple demonstration to exemplify the process of storing a password with the method being used currently, however these hashes and salt we are doing are simpler than the ones used for real security applications. 


In [26]:
username = input("Username:  ")
password = input("Password:  ")

username_hashed = hash(username)
password_hashed = hash(password)

Username:  nickname
Password:  password1


In [27]:
username_hashed

8998932324979292731

In [28]:
password_hashed

1417833961308575452

If we store the passwords as it is now, it is really unsafe, because many people choose common words or numbers for passwords and therefore their hashes are all known, and it can also be obtained through known databases.

 The idea here, is to store the password in a more secure way, which is adding a "Salt" into the password. Salt is a random string that will be added together with the original password and then the result of this union will be hashed, turning into a hash that is much more unique

In [29]:
import random
import string
import hashlib

def hash_password(password, size):
    letters = string.ascii_lowercase
    salt = ''.join(random.choice(letters) for i in range(size)) # random string
    salted_password = password + salt
    hashed_password = hash(salted_password)
    hashed_sha256 = hashlib.sha256(salted_password.encode()).hexdigest()
    
    
    return hashed_password, hashed_sha256, salted_password
    
    
    

In [30]:
stored_password = hash_password(password, 20)
stored_password

(-2362710329689258959,
 'f19b50a385e74d371b8e8d9a73c620b2a188060102bf4a39419d89363f4abe99',
 'password1nmxhwetcahafxxgsjxbr')

In [31]:
stored_password = hash_password(password, 20)
stored_password

(2802096777197846963,
 '0af08d3c944f4548c45296e85f38de24f90e49aa5b203a6a723668865cdae212',
 'password1dafjlwgbeljzinjmbhky')

## HashSet Class

In [None]:
class HashSet:
    def __init__(self, contents=[]):
        self.items = [None]*10
        self.numItems = 0
        
        for item in contents:
            self.add(item)
            
    
    # HashSet Add Helper Function
    def __add(item, items):
        idx = hash(item)%len(items)
        loc = -1
        
        while items[idx] != None:
            if items[idx] == item:
            # item already in set
            return False
        
            if loc < 0 and type(items[idx]) == HashSet.__Placeholder:
                loc = idx
            
            idx = (idx + 1)%len(items)
            
        if loc < 0:
            loc = idx
        
        items[loc] = item
        
        return True
    
    
    # HashSet Add
    def __reshash(oldList, newList):
        for x in oldList:
            if x != None and type(x) != HashSet.__Placeholder:
                HashSet.__add(x,newList)
            
            return newList
        
    def add(self, item):
        if HashSet.__add(item, self.items):
            self.numItems += 1
            load = self.numItems/len(self.items)
            if load >= 0.75:
                self.items = HashSet.__rehash(self.items, [None]*2*len(self.items))
                
    # HashSet Remove Helper Function
    class __Placeholder:
        def __init__(self):
            pass

        def __eq__(self,other):
            return False

    def __remove(item,items):
        idx = hash(item) % len(items)

        while items[idx] != None:
            if items[idx] == item:
                nextIdx = (idx + 1) % len(items)
                if items[nextIdx] == None:
                    items[idx] = None
                else:
                    items[idx] = HashSet.__Placeholder()
                return True

            idx = (idx + 1) % len(items)
        return False
    
    # HashSet Remove
    def remove(self, item):
        if HashSet.__remove(item, self.items):
            self.numItems -= 1
            load = max(self.numItems, 10)/len(self.items)
            if load <= 0.25:
                self.items = HashSet.__rehash(self.items, [None]*int(len(self.items)/2))
            else:
                raise KeyError("Item not in HashSet")
                
    # HashSet Membership
    def __contains__(self,item):
        idx = hash(item)%len(self.items)
        while self.items[idx] != None:
            if self.items[idx] ==item:
                return True
            
            idx = (idx + 1)%len(self.items)
            
        return False
    
    # Iterating over a set
    def __iter__(self):
        for i in range(len(self.items)):
            if self.items[i] != None and type(self.items[i]) != HashSet.__Placeholder:
                yield self.items[i]
    
    
    #HashSet Difference Update
    def difference_update(self, other):
        for item in other:
            self.discard(item)
            
    # HashSet Difference
    def difference(self, other):
        result = HashSet(self)
        result.difference_update(other)
        return result
