# Hash Table - Open Addressing

In [46]:
import numpy as np
import os

## algorithm

In [47]:
class HashTable:

    ratioExpand = .75
    ratioShrink = .25
    minSize = 8
    # define marked empty
    _empty = (None,)
    
    def __init__(self, size=None):
        self._size    = size if (size or size > self.minSize) else self.minSize
        self._buckets = [None] * self._size
        self._num     = 0

    def _entry(self, key):
        hash_idx = hash(key)
        idx1 = None
        
        for i in range(self._size):
            # quadratic probing
            idx = ( hash_idx + (i+i*i)//2 ) % self._size
            entry = self._buckets[idx]
            if entry is None:
                return (None, idx) if idx1 is None else (None, idx1)
            elif entry is self._empty and idx1 is None:
                idx1 = idx
            elif entry[0] == key:
                return entry, idx
        else:
            # out of space
            if idx1 is None:
                raise IndexError()
        
        # entry, index
        return (None, idx1)

    def _ensureCapacity(self):
        if self._num > self.ratioExpand * self._size:
            self._size = self._size << 1
            # print('Expand to '+str(self._size))
        elif self._num < self.ratioShrink * self._size and \
            (self._size >> 1) >= self.minSize:
            self._size = self._size >> 1
            # print('Shink to '+str(self._size))
        else:
            return
        
        # reallocate
        entries = self._buckets   # shallow copy
        self._buckets = [None] * self._size
        
        for p in entries:
            if p and p is not self._empty:
                _, idx = self._entry(p[0])
                self._buckets[idx] = p
        
    def __len__(self):
        return self._num

    def __contains__(self, key):
        p, _ = self._entry(key)
        return bool(p)

    def __getitem__(self, key):
        p, _ = self._entry(key)
        return p[1] if p else None

    def __setitem__(self, key, value):
        p, idx = self._entry(key)
        self._buckets[idx] = (key, value)
        
        if p is None or p is self._empty:
            self._num += 1
            self._ensureCapacity()

    def __delitem__(self, key):
        p, idx = self._entry(key)
        if p:
            self._buckets[idx] = self._empty
        
        if p and p is not self._empty:
            self._num -= 1
            self._ensureCapacity()

    def __iter__(self):
        for p in self._buckets:
            if p and p is not self._empty:
                yield p
            
    def slots(self):
        return ''.join(p and ('o' if p is self._empty else 'x') or '-' for p in self._buckets)

## run

In [48]:
table = HashTable()

In [54]:
with open("players_list.txt") as fp:
    for line in fp:
        key, value = line.split(', ')[0], line.split(', ')[1].split('\n')[0]
        table[key] = value
        if np.random.rand() >= .5:
            del table[key]

In [55]:
len(table), table._size

(49, 128)

In [56]:
print(table.slots())

--xxxox-x---o----x----xox----xxx--xx--xx-x--x-xx-o-x-xxxxx-x--xxxxxx--x-------x-x--------------xo--xo---xx-xx--xxxxxx-----x---x-


In [57]:
for item in table:
    print(item)

('Neymar', 'Brazil')
('Arturo Vidal', 'Chile')
('Mario Mand\xc5\xbeuki\xc4\x87', 'Croatia')
('Joe Hart', 'England')
('Luka Modric', 'Croatia')
('Leonardo Bonucci', 'Italy')
('Mats Hummels', 'Germany')
('Edinson Cavani', 'Uruguay')
('Javi Martinez', 'Spain')
('Cristiano Ronaldo', 'Portugal')
('Manuel Neuer', 'Germany')
('Thomas Muller', 'Germany')
('Mario G\xc3\xb6tze', 'Germany')
('Moussa Dembele', 'Belgium')
('Bastian Schweinsteiger', 'Germany')
('Ashley Cole', 'England')
('Lionel Messi', 'Argentina')
("Mapou Yanga-M'Biwa", 'France')
('Samir Nasri', 'France')
('Thiago Silva', 'Brazil')
('Didier Drogba', 'Ivory Coast')
('Wayne Rooney', 'England')
('Daniele De Rossi', 'Italy')
('Carlos Tevez', 'Argentina')
('Sergio Aguero', 'Argentina')
('Mathieu Valbuena', 'France')
('Klaas Jan Huntelaar', 'Netherlands')
('Robert Lewandowski', 'Poland')
('Emmanuel Mayuka', 'Zambia')
('Santi Cazorla', 'Spain')
('David Silva', 'Spain')
('Pierre-Emerick Aubameyang', 'Gabon')
('Patrice Evra', 'France')
('J

In [52]:
table['Neymar']

In [53]:
print(table._buckets[0])

('Emmanuel Mayuka', 'Zambia')


In [41]:
# delete all the values
for item in list(table):
    del table[item[0]]

In [42]:
len(table), table._size

(0, 8)

In [32]:
for item in table:
    print(item)