### Problem Statement
Python Dictionaries and Hash Table

In [1]:
phone_numbers = {
    "Aakash": "9489484949",
    "Hemanth": "984874849834",
    "Siddhant": "98232766438"
}
phone_numbers

{'Aakash': '9489484949', 'Hemanth': '984874849834', 'Siddhant': '98232766438'}

You can access a person's phone number using their name

In [2]:
phone_numbers["Aakash"]

'9489484949'

You can store new values or update the already existed ones

In [4]:
phone_numbers["Vishal"] = "8787878787"

phone_numbers["Aakash"] = "7878787878"

phone_numbers

{'Aakash': '7878787878',
 'Hemanth': '984874849834',
 'Siddhant': '98232766438',
 'Vishal': '8787878787'}

In [5]:
for name in phone_numbers:
    print(f"Name: {name}, Phone Number: {phone_numbers[name]}")

Name: Aakash, Phone Number: 7878787878
Name: Hemanth, Phone Number: 984874849834
Name: Siddhant, Phone Number: 98232766438
Name: Vishal, Phone Number: 8787878787


Dictionaries in Python are implemented using a data structure called **Hash Tabel**. A hash table uses a list/array to store the key-value pairs, and uses a *hashing function* to determine the index for storing or retrieving the data associated with a given key.

The objective is to implement a `HashTable` class which supports the following operations:

1. **Insert:** Insert a new key-value pair.
2. **Find:** Find the value associated with the key.
3. **Update:** Update the value associated with the key.
4. **List:** List all the keys stored in the HashTable.

In [6]:
class HashTable:
    def insert(self, key, value):
        """Insert a new key-value pairs"""
        pass
    def find(self, key):
        """Find the value associated with the key"""
        pass
    def update(self, key, value):
        """Change the value associated with a key"""
        pass
    def list_all(self):
        """List all the keys"""
        pass

DataList

In [7]:
MAX_HASH_TABLE_SIZE = 4096

In [8]:
data_list = [None] * 4096

In [9]:
len(data_list) == 4096

True

In [10]:
data_list[99] == None

True

In [12]:
for item in data_list:
    assert item == None

A *Hashing Function* is used to convert strings and other non-numeric data types into numbers, which can then be used as list indices. For instance, if a hashing function converts the string "Aakash" into the number 4, then the key-value pair ("Aakash", "7878787878") will be stored at the position 4 within the data list.

Here's a simple algorithm for hashing, which can convert strings into numeric list indices.

1. Iterate over the string, character by character.
2. Convert each character to a number using Python's built-in ord function.
3. Add the numbers for each character to obtain the hash for the entire string.
4. Take the remainder of the result with the size of the data list

In [13]:
def get_index(data_list, a_string):
    # Variable to store the result
    result = 0
    for a_character in a_string:
        # Convert the character to a number (using ord)
        a_number = ord(a_character)
        # Update result by adding the number
        result += a_number
    # Take the remainder of the result with the size of the data list
    list_index = result % len(data_list)
    return list_index

In [14]:
get_index(data_list, "") == 0

True

In [15]:
get_index(data_list, "Aakash") == 585

True

In [16]:
get_index(data_list, "Don O Leary") == 941

True

In [17]:
data_list2 = [None] * 48

In [18]:
ord("A") + ord("a") + ord("k") + ord("a") + ord("s") + ord("h")

585

In [19]:
585 % 48

9

In [20]:
get_index(data_list2, "Aakash") == 9

True

##### Insert

In [21]:
key, value = "Aakash", "7878787878"

In [22]:
idx = get_index(data_list, key)
idx

585

In [23]:
data_list[idx] = (key, value)

In [24]:
data_list[get_index(data_list, "Hemanth")] = ("Hemanth", "9595949494")

##### Find

In [26]:
idx = get_index(data_list, "Aakash")
idx

585

In [28]:
key, value = data_list[idx]
value

'7878787878'

##### List

In [29]:
pairs = [kv[0] for kv in data_list if kv is not None]

In [30]:
pairs

['Aakash', 'Hemanth']

In [42]:
class BasicHashTable:
    def __init__(self, max_size = MAX_HASH_TABLE_SIZE):
        self.data_list = [None] * max_size

    def insert(self, key, value):
        idx = get_index(self.data_list, key)
        self.data_list[idx] = (key, value)
    
    def find(self, key):
        idx = get_index(self.data_list, key)
        kv = self.data_list[idx]
        if kv is None:
            raise IndexError("Key-value pair doesn't exist")
        else:
            k, v = kv
            return v
        
    def update(self, key, value):
        idx = get_index(self.data_list, key)
        self.data_list[idx] = key, value

    def list_all(self):
        return [kv[0] for kv in self.data_list if kv is not None]

In [43]:
basic_table = BasicHashTable(max_size = 1024)
len(basic_table.data_list)

1024

In [44]:
# Insert some value
basic_table.insert("Aakash", "9999999999")
basic_table.insert("Hemanth", "8888888888")

# Find a value
basic_table.find("Aakash")

'9999999999'

In [45]:
# Update a value
basic_table.update("Aakash", "7777777777")

# Checking the updates value
basic_table.find("Aakash")

'7777777777'

In [47]:
basic_table.list_all()

['Aakash', 'Hemanth']

#### Problem with the Linear Prob

In [48]:
get_index(data_list, "listen"), get_index(data_list, "silent")

(655, 655)

In [49]:
basic_table.insert("listen", 99)
basic_table.insert("silent", 200)

In [50]:
basic_table.find("listen")

200

To solve this problem

In [65]:
def get_valid_index(data_list, key):
    # Starting with the get_index Function
    idx = get_index(data_list, key)

    while True:
        kv = data_list[idx]

        if kv is None:
            return idx
        
        k, v = kv
        if k == key:
            return idx
        
        idx += 1

        if idx == len(data_list):
            idx = 0

In [53]:
# Create an empty hash table
data_list2 = [None] * MAX_HASH_TABLE_SIZE

# new key "listen" should return 655
get_valid_index(data_list2, "listen")

655

In [54]:
# Inserting the key-value pair for the key "listen"
data_list2[get_valid_index(data_list2, "listen")] = ("listen", 99)

In [55]:
# Colliding key "silent" should return 656
get_valid_index(data_list2, "silent")

656

In [66]:
class ProbingHashTable:
    def __init__(self, max_size = MAX_HASH_TABLE_SIZE):
        self.data_list = [None] * max_size

    def insert(self, key, value):
        idx = get_valid_index(self.data_list, key)
        self.data_list[idx] = (key, value)

    def find(self, key):
        idx = get_valid_index(self.data_list, key)
        kv = self.data_list[idx]
        return None if kv is None else kv[1]
    
    def update(self,key, value):
        idx = get_valid_index(self.data_list, key)
        self.data_list[idx] = key, value

    def list_all(self):
        return [kv[0] for kv in self.data_list if kv is not None]

In [67]:
probing_table = ProbingHashTable()

In [68]:
probing_table.insert("listen", 99)
probing_table.find("listen") == 99

True

In [69]:
probing_table.insert("silent", 200)

In [70]:
probing_table.list_all()

['listen', 'silent']