# Hash Table - Open Addressing

In [1]:
import numpy as np
import copy
import os

## algorithm

In [272]:
class Entry:
    def __init__(self, key, value, next_en):
        self._key = key
        self._value = value
        self._next = next_en
        
    def __repr__(self):
        return "entry(key= " + repr(self._key) + ", value= " + repr(self._value) + ")"

class HashTable:

    ratioExpand = .95
    ratioShrink = .05
    minSize = 13
    
    def __init__(self, size=None):
        self._size    = size if (size or size > self.minSize) else self.minSize
        self._buckets = [None] * self._size
        self._num     = 0

    def _entry(self, key):
        hash_idx = hash(key)
        idx = hash_idx % self._size
        
        p = self._buckets[idx]
        q = None
        while p and p._key != key:
            p, q = p._next, p
        
        # entry, prev_entry, index
        return p, q, idx

    def _ensureCapacity(self):
        if self._num > self.ratioExpand * self._size:
            self._size = (self._size << 1) + 1
            # print('Expand to '+str(self._size))
        elif self._num < self.ratioShrink * self._size and \
            (self._size >> 1) >= self.minSize:
            self._size = self._size >> 1
            # print('Shink to '+str(self._size))
        else:
            return
        
        # reallocate
        old_buckets = copy.copy(self._buckets)  # shallow copy
        self._buckets = [None] * self._size
        
        for p in old_buckets:
            while p:
                idx = hash(p._key) % self._size
                p, q = p._next, p
                q._next = self._buckets[idx]
                self._buckets[idx] = q
        
    def __len__(self):
        return self._num

    def __contains__(self, key):
        p, _, _ = self._entry(key)
        return bool(p)

    def __getitem__(self, key):
        p, _, _ = self._entry(key)
        return p._value if p else None

    def __setitem__(self, key, value):
        p, _, idx = self._entry(key)
        if p is not None:
            p._value = value
            return
        else:
            p = Entry(key, value, self._buckets[idx])
            self._buckets[idx] = p
            self._num += 1
            self._ensureCapacity()

    def __delitem__(self, key):
        p, q, idx = self._entry(key)
        if p is None:
            return
        if q is None:
            self._buckets[idx] = p._next
        else:
            q._next = p._next
        
        self._num -= 1
        self._ensureCapacity()

    def __iter__(self):
        for p in self._buckets:
            while p:
                yield p
                p = p._next
            
    def slots(self):
        return ''.join(p and 'x' or '-' for p in self._buckets)
    
    def numChanin(self):
        counts = [0] * self._size
        for idx in range(self._size):
            p = self._buckets[idx]
            while p:
                p = p._next
                counts[idx] += 1
        return counts

## run

In [273]:
table = HashTable()

In [274]:
with open("players_list.txt") as fp:
    for line in fp:
        key, value = line.split(', ')[0], line.split(', ')[1].split('\n')[0]
        table[key] = value

In [275]:
len(table), table._size

(100, 111)

In [276]:
print(table.slots())

-x-xxxxx-xx-xx-xxxx-xxxxxxxxxx--xx----xxx-x-x-x-x-xx-x-xx--xxxx--xx-xxxx---xxxx-xxxxx-xxx-x-x--xxx-x--x--xxxx-x


In [277]:
print(table.numChanin())

[0, 2, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 2, 0, 1, 1, 3, 1, 0, 1, 1, 4, 1, 1, 3, 3, 2, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, 2, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 2, 0, 0, 1, 1, 1, 2, 0, 0, 3, 1, 0, 1, 1, 2, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 2, 4, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 4, 0, 2]


In [261]:
for item in table:
    print(item)

entry(key= 'Klaas Jan Huntelaar', value= 'Netherlands')
entry(key= 'Zlatan Ibrahimovic', value= 'Sweden')
entry(key= 'Wesley Sneijder', value= 'Netherlands')
entry(key= 'Luis Suarez', value= 'Uruguay')
entry(key= 'David Villa', value= 'Spain')
entry(key= 'Carles Puyol', value= 'Spain')
entry(key= 'Paulinho', value= 'Brazil')
entry(key= 'Angel Di Maria', value= 'Argentina')
entry(key= 'Juan Mata', value= 'Spain')
entry(key= "Mapou Yanga-M'Biwa", value= 'France')
entry(key= 'Franck Ribery', value= 'France')
entry(key= 'Sergio Busquets', value= 'Spain')
entry(key= 'Victor Wanyama', value= 'Kenya')
entry(key= 'Neymar', value= 'Brazil')
entry(key= 'Daniele De Rossi', value= 'Italy')
entry(key= 'Gianluigi Buffon', value= 'Italy')
entry(key= 'Toni Kroos', value= 'Germany')
entry(key= 'Patrice Evra', value= 'France')
entry(key= 'Nemanja Vidic', value= 'Serbia')
entry(key= 'Marcelo', value= 'Brazil')
entry(key= 'Pierre-Emerick Aubameyang', value= 'Gabon')
entry(key= 'Thiago Silva', value= 'Braz

In [278]:
table['Neymar']

'Brazil'

In [279]:
print(table._buckets[0])

None


In [280]:
# delete all the values
for item in list(table):
    del table[item._key]

In [281]:
len(table), table._size

(0, 13)

In [282]:
for item in table:
    print(item)