In [9]:
from datetime import date
import calendar

key_array = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July',
              'August', 'September', 'October', 'November', 'December' ]
month_length = [ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]


def print_month(month, year):
    idx = key_array.index(month)
    day = 1
    
    wd = date(year, idx+1, day).weekday()
    wd = (wd+1) % 7
    end = month_length[idx]
    
    if calendar.isleap(year) and idx == 1:    # February LeapYear has one extra day
        end += 1

    print('{} {}'.format(month,year).center(20))
    print('Su Mo Tu We Th Fr Sa')
    print('   ' * wd, end='')                 # Pad spacing
    while day <= end:
        print('{:2d} '.format(day), end='')
        wd = (wd + 1) % 7
        day += 1
        if wd == 0: print()
    print()

In [15]:
print_month('November', 2003)

   November 2003    
Su Mo Tu We Th Fr Sa
                   1 
 2  3  4  5  6  7  8 
 9 10 11 12 13 14 15 
16 17 18 19 20 21 22 
23 24 25 26 27 28 29 
30 


In [1]:
days_in_month = {
    'January'   : 31,    'February'  : 28,   'March'     : 31,
    'April'     : 30,    'May'       : 31,   'June'      : 30,
    'July'      : 31,    'August'    : 31,   'September' : 30,
    'October'   : 31,    'November'  : 30,   'December'  : 31
}


class Entry:
    def __init__(self, k, v) -> None:
        self.key = k
        self.value = v

class LinkedEntry:
    def __init__(self, k, v, rest=None) -> None:
        self.key = k
        self.value = v
        self.next = rest

class HashTable:
    def __init__(self, M=10) -> None:
        self.table = [None] * M
        self.M = M
        self.N = 0
        
        self.load_factor = 0.75
        self.threshold = min(M*self.load_factor, M-1)
        
    def __iter__(self):
        for entry in self.table:
            while entry:
                yield(entry.key, entry.value)
                entry = entry.next
    
    def get(self, k):
        hc = hash(k) % self.M
        entry = self.table[hc]
        while entry:
            if entry.key == k:
                return entry.value
            entry = entry.next
        return None
    
    def put(self, k, v):
        hc = hash(k) % self.M
        entry = self.table[hc]
        while entry:
            if entry.key == k:
                entry.value = v
                return
            entry = entry.next
        
        self.table[hc] = LinkedEntry(k, v, self.table[hc])
        self.N += 1
        
        if self.N >= self.threshold:
            self.resize(2*self.M+1)
        
    def remove(self, k):
        hc = hash(k) % self.M
        entry = self.table[hc]
        
        # Case 1: The entry to remove is the first one in the list (head)
        if entry and entry.key == k:
            self.table[hc] = entry.next 
            entry.next = None 
            self.N -= 1
            return entry.value

        # Case 2: The entry is somewhere in the linked list
        prev = None
        while entry:
            if entry.key == k:
                prev.next = entry.next
                entry.next = None  
                self.N -= 1
                return entry.value
            prev = entry
            entry = entry.next
        
        print('The key is not present.')
        
    def resize(self, new_size):
        print(f'Resizing the HashTable to M: {new_size}')
        temp = HashTable(new_size)
        
        for entry in self.table:
            while entry:
                temp.put(entry.key, entry.value)
                entry = entry.next
        
        self.table = temp.table
        self.M = temp.M
        self.threshold = temp.threshold

In [2]:
table = HashTable(6)

for n in range(7):
    table.put(f'April{n}', 30)
    table.put(f'May{n}', 31)

Resizing the HashTable to M: 13
Resizing the HashTable to M: 27


In [4]:
for n in table.__iter__():
    print(n)

('April1', 30)
('May1', 31)
('April3', 30)
('April6', 30)
('May5', 31)
('April4', 30)
('May3', 31)
('April0', 30)
('April2', 30)
('May2', 31)
('May4', 31)
('May6', 31)
('April5', 30)
('May0', 31)


## Exerc:

In [2]:
class Entry:
    def __init__(self, k, v) -> None:
        self.key = k
        self.value = v

class HashTableLP:
    def __init__(self, M=10) -> None:
        self.table = [None] * M
        self.M = M
        self.N = 0
    
    def get(self, k):
        hc = hash(k) % self.M
        while self.table[hc]:
            if self.table[hc].key == k:
                return self.table[hc].value
            hc = (hc+1) % self.M
        return None
    
    def put(self, k, v):
        hc = hash(k) % self.M
        while self.table[hc]:
            if self.table[hc].key == k:
                self.table[hc].value = v
                return
            hc = (hc+1) % self.M
        
        if self.N >= self.M-1:
            raise RuntimeError('Table is Full!')
        
        self.table[hc] = Entry(k, v)
        self.N += 1

# uses a probe sequence that explores idx using triagle numbers (1,3,6,10...)
class HashTableTN:
    def __init__(self, M=10) -> None:
        self.table = [None] * M
        self.M = M
        self.N = 0
    
    def get(self, k):
        hc = hash(k) % self.M
        num = 1
        while self.table[hc]:
            if self.table[hc].key == k:
                return self.table[hc].value
            hc = (hc+num) % self.M
            num += 1
        return None
    
    def put(self, k, v):
        hc = hash(k) % self.M
        num = 1
        while self.table[hc]:
            if self.table[hc].key == k:
                self.table[hc].value = v
                return
            hc = (hc+num) % self.M
            num += 1
        
        if self.N >= self.M-1:
            raise RuntimeError('Table is Full!')
        
        self.table[hc] = Entry(k, v)
        self.N += 1

In [26]:
import timeit
import nltk
from nltk.corpus import words

first_100k = words.words()[:160_564]

def test_triangular_num_put():
    for i, word in enumerate(first_100k):
        h_table_triangular.put(word, i)

def test_triangular_num_get():
    for word in first_100k:
        h_table_triangular.get(word)
        
        
def test_lp_put():
    for i, word in enumerate(first_100k):
        h_table_lp.put(word, i)
        
def test_lp_get():
    for word in first_100k:
        h_table_lp.get(word)
    

h_table_triangular = HashTableTN(M=524_288)
execution_time_triang_put = timeit.timeit("test_triangular_num_put()", globals=globals(), number=3)
execution_time_triang_get = timeit.timeit("test_triangular_num_get()", globals=globals(), number=3)
del h_table_triangular

h_table_lp = HashTableLP(M=524_288)
execution_time_lp_put = timeit.timeit("test_lp_put()", globals=globals(), number=3)
execution_time_lp_get = timeit.timeit("test_lp_get()", globals=globals(), number=3)
del h_table_lp

print(f"Time to insert 100,000 words in triangular: {execution_time_triang_put:.6f} seconds")
print(f"Time to access 100,000 words in triangular: {execution_time_triang_get:.6f} seconds")
print()
print(f"Time to insert 100,000 words in linear prob: {execution_time_lp_put:.6f} seconds")
print(f"Time to access 100,000 words in linear prob: {execution_time_lp_get:.6f} seconds")


Time to insert 100,000 words in triangular: 0.724099 seconds
Time to access 100,000 words in triangular: 0.462633 seconds

Time to insert 100,000 words in linear prob: 0.651156 seconds
Time to access 100,000 words in linear prob: 0.455950 seconds


In [1]:
class LinkedEntry:
    def __init__(self, k, v, rest=None) -> None:
        self.key = k
        self.value = v
        self.next = rest

class HashTableLinkedList:
    def __init__(self, M=10) -> None:
        self.table = [None] * M
        self.M = M
        self.N = 0
        
        self.load_factor = 0.75
        self.threshold = min(M*self.load_factor, M-1)
        
    def __iter__(self):
        for entry in self.table:
            while entry:
                yield(entry.key, entry.value)
                entry = entry.next
    
    def get(self, k):
        hc = hash(k) % self.M
        entry = self.table[hc]
        while entry:
            if entry.key == k:
                return entry.value
            entry = entry.next
        return None
    
    def put(self, k, v):
        hc = hash(k) % self.M
        entry = self.table[hc]
        while entry:
            if entry.key == k:
                entry.value = v
                return
            entry = entry.next
        
        self.table[hc] = LinkedEntry(k, v, self.table[hc])
        self.N += 1
        
        if self.N >= self.threshold:
            self.resize(2*self.M+1)
        
    def remove(self, k):
        hc = hash(k) % self.M
        entry = self.table[hc]
        
        # Case 1: The entry to remove is the first one in the list (head)
        if entry and entry.key == k:
            self.table[hc] = entry.next 
            entry.next = None 
            self.N -= 1
            return entry.value

        # Case 2: The entry is somewhere in the linked list
        prev = None
        while entry:
            if entry.key == k:
                prev.next = entry.next
                entry.next = None  
                self.N -= 1
                return entry.value
            prev = entry
            entry = entry.next
        
        print('The key is not present.')
        
    def resize(self, new_size):
        print(f'Resizing the HashTable to M: {new_size}')
        temp = HashTableLinkedList(new_size)
        
        for entry in self.table:
            while entry:
                temp.put(entry.key, entry.value)
                entry = entry.next
        
        self.table = temp.table
        self.M = temp.M
        self.threshold = temp.threshold

In [24]:
h_table_linked = HashTableLinkedList(M=524_288)
def test_linked_put():
    for i, word in enumerate(first_100k):
        h_table_linked.put(word, i)

def test_linked_get():
    for word in first_100k:
        h_table_linked.get(word)
        
execution_time_linked_put = timeit.timeit("test_linked_put()", globals=globals(), number=1)
execution_time_linked_get = timeit.timeit("test_linked_get()", globals=globals(), number=1)
del h_table_linked

print(f"Time to insert 100,000 words in linked list: {execution_time_linked_put:.6f} seconds")
print(f"Time to access 100,000 words in linked list: {execution_time_linked_get:.6f} seconds")

Time to insert 100,000 words in linked list: 0.381230 seconds
Time to access 100,000 words in linked list: 0.132629 seconds


In [3]:
import timeit
import nltk
from nltk.corpus import words

In [12]:
class ValueBadHash:
    def __init__(self, v) -> None:
        self.v = v
        
    def __hash__(self) -> int:
        return hash(self.v) % 4

    def __eq__(self, other) -> bool:
        return (self.__class__==other.__class__ and self.v==other.v)

In [10]:
v1 = ValueBadHash(7)
v2 = ValueBadHash(7)

In [19]:
## Bad hash table
first_10k = words.words()[:10_000]

bad_table = HashTableLinkedList(M=50_000)

def bad_table_put():
    for word in first_10k:
        bad_table.put(ValueBadHash(word), 1)

execution_time_bad_put = timeit.timeit("bad_table_put()", globals=globals(), number=1)

print(f"Time to insert 10,000 words using ValueBadHash: {execution_time_bad_put:.6f} seconds")

Time to insert 10,000 words using ValueBadHash: 4.111918 seconds


In [22]:
max_length=0
non_empty_buckets=0

for bucket in bad_table.table:
    length = 0
    while bucket:
        length += 1
        bucket = bucket.next
    max_length = max(max_length, length)

for bucket in bad_table.table:
    if bucket:
        non_empty_buckets += 1
    
print(f'The max lenght of a bucket is: {max_length}')
print(f'The average lenght of a bucket is: {len(first_10k)//non_empty_buckets}')

The max lenght of a bucket is: 2511
The average lenght of a bucket is: 2500


In [32]:
from nltk.corpus import words

def find_max_length(table):
    max_length = 0
    for bucket in table.table:
        length = 0
        while bucket:
            length += 1
            bucket = bucket.next
        max_length = max(max_length, length)
    
    return max_length

def find_avg_length(table, entries):
    non_empty_buckets = 0
    
    for bucket in table.table:
        if bucket:
            non_empty_buckets += 1
    
    return entries//non_empty_buckets

def put_in_table(table, to_put):
    for thing in to_put:
        table.put(thing, 1)

        
results = {'M': [], 'put_time': [], 'average_chain_length': [], 'max_chain_length': []}
words_to_insert = words.words()


for n in range(428_880, 428_981):
    table = HashTableLinkedList(M=n)
    
    execution_time = timeit.timeit("put_in_table(table, words_to_insert)", globals=globals(), number=1)
    max_length = find_max_length(table)
    average_length = find_avg_length(table, len(words_to_insert))
    
    results['M'].append(n)
    results['put_time'].append(execution_time)
    results['max_chain_length'].append(max_length)
    results['average_chain_length'].append(average_length)


In [35]:
import pandas as pd

df_results = pd.DataFrame(results)
df_results.head(10)

Unnamed: 0,M,put_time,average_chain_length,max_chain_length
0,428880,0.502119,1,7
1,428881,0.415664,1,7
2,428882,0.414394,1,7
3,428883,0.422292,1,6
4,428884,0.399738,1,7
5,428885,0.382429,1,7
6,428886,0.384233,1,7
7,428887,0.375273,1,7
8,428888,0.381072,1,6
9,428889,0.38348,1,6


In [36]:
df_results.groupby('max_chain_length')['put_time'].mean()

max_chain_length
6    0.615406
7    0.578613
8    0.656071
Name: put_time, dtype: float64

In [39]:
max_chain_length_value = df_results['max_chain_length'].max()
df_results.loc[df_results['max_chain_length']==max_chain_length_value]

Unnamed: 0,M,put_time,average_chain_length,max_chain_length
41,428921,0.644832,1,8
48,428928,0.648754,1,8
58,428938,0.659079,1,8
82,428962,0.6672,1,8
99,428979,0.660491,1,8


In [43]:
class Entry:
    def __init__(self, k, v) -> None:
        self.key = k
        self.value = v

class HashTableWithRemove:
    def __init__(self, M=10) -> None:
        self.table = [None] * M
        self.M = M
        self.N = 0
    
    def get(self, k):
        hc = hash(k) % self.M
        while self.table[hc]:
            if self.table[hc].key == k:
                return self.table[hc].value
            hc = (hc+1) % self.M
        return None
    
    def put(self, k, v):
        hc = hash(k) % self.M
        while self.table[hc]:
            if self.table[hc].key == k:
                self.table[hc].value = v
                return

            hc = (hc+1) % self.M
        
        if self.N >= self.M-1:
            raise RuntimeError('Table is Full!')
        
        self.table[hc] = Entry(k, v)
        self.N += 1
    
    def remove(self, k, to_rehash=True):
        hc = hash(k) % self.M
        while self.table[hc]:
            if self.table[hc].key == k:
                self.table[hc] = None
                self.N -= 1
                
                if to_rehash == True:
                    self.rehash_next_keys(hc)
                return
                
            hc = (hc+1) % self.M
        
        print('Key not found.')
        return

    def rehash_next_keys(self, hc):
        print('Rehashing.')
        initial_hc = hc
        hc = (hc+1) % self.M
        
        while hc != initial_hc:
            if self.table[hc]:
                k = self.table[hc].key
                v = self.table[hc].value
                
                self.table[hc] = None
                self.N -= 1
                
                self.put(k, v)
            hc = (hc+1) % self.M
        
        return

table = HashTableWithRemove(M=10)

for n in range(8):
    table.put(f'test{n}', n)

table.remove('test2')

Rehashing.


In [37]:
table = HashTableWithRemove(M=10)

for n in range(8):
    if n != 3:
        table.put(f'test{n}', n)

for n in range(8):
    hc = hash(f'test{n}') % 10
    print(f'test{n} --> {hc}')

table.remove('test2')

test0 --> 2
test1 --> 3
test2 --> 8
test3 --> 9
test4 --> 8
test5 --> 4
test6 --> 6
test7 --> 5
k: test2 --> hc: 8
Rehashing.
k: test4 --> hc: 8
Key not found.


In [35]:
table.table[8].key

'test4'

In [27]:
table.remove('test2')
table.table

Rehashing.
Key not found.


[None,
 None,
 <__main__.Entry at 0x28103cc53d0>,
 <__main__.Entry at 0x28103abab50>,
 <__main__.Entry at 0x28103cd9ee0>,
 <__main__.Entry at 0x28103cd7e80>,
 <__main__.Entry at 0x28104df6dc0>,
 None,
 <__main__.Entry at 0x28104ecf460>,
 <__main__.Entry at 0x28104d95d90>]

In [103]:
table.table

[None, None, <__main__.Entry at 0x2416ea514f0>, None, None, None]

In [107]:
class Entry:
    def __init__(self, k, v) -> None:
        self.key = k
        self.value = v

class HashTableMax:
    def __init__(self, M=10) -> None:
        self.table = [None] * M
        self.M = M
        self.N = 0
    
    def get_max_key(self):
        max_occurences = 0
        max_key = 0
        
        for entry in self.table:
            if entry:
                if entry.value > max_occurences:
                    max_occurences = entry.value
                    max_key = entry.key
        
        return (max_key, max_occurences)
    
    def put(self, k):
        hc = hash(k) % self.M
        while self.table[hc]:
            if self.table[hc].key == k:
                self.table[hc].value += 1
                return

            hc = (hc+1) % self.M
        
        if self.N >= self.M-1:
            raise RuntimeError('Table is Full!')
        
        self.table[hc] = Entry(k, 1)
        self.N += 1
        

import random

# 1k random nums
random_nums = [random.randint(0, 10) for _ in range(1000)]
table = HashTableMax(M=12)

for n in random_nums:
    table.put(n)

In [120]:
table.get_max_key()

(8, 111)