In [1]:
from collections.abc import MutableMapping
from random import randrange

# Example

In [2]:
freq = dict()
with open("/Users/imad/Documents/dl-zoo/deep-learning-with-pytorch/deep-learning-udacity/sentiment-rnn/data/reviews.txt", "r") as f:
    reviews = f.read().lower().split()
    for s in reviews:
        word = ''.join(char for char in s if s.isalpha())
        if word:
            freq[word] = 1 + freq.get(word, 0)
freq = dict(sorted(freq.items(), key=lambda pair: pair[1], reverse=True))
list(freq.items())[:5]

[('the', 336713),
 ('and', 164107),
 ('a', 163009),
 ('of', 145864),
 ('to', 135720)]

# Map ADT

In [3]:
class MapBase(MutableMapping):
    """Abstract base class that implements map items."""

    class _Item:
        """Class that will store map items as key-value pairs."""

        __slots__ = "_key", "_value"

        def __init__(self, key, value):
            self._key = key
            self._value = value

        def __eq__(self, other):
            return self._key == other._key

        def __ne__(self, other):
            return not (self == other)

        def __lt__(self, other):
            return self._key < other._key

# Unsorted Table Map

In [4]:
class UnsortedTableMap(MapBase):
    """Map implementation using unsorted list (array)."""

    def __init__(self):
        self._table = []

    def __getitem__(self, key):
        for item in self._table:
            if item._key == key:
                return item._value
        raise KeyError(f"KeyError: '{key}'")

    def __setitem__(self, key, value):
        for item in self._table:
            if item._key == key:
                item._value = value
                return
        self._table.append(self._Item(key, value))

    def __delitem__(self, k):
        for i, item in enumerate(self._table):
            if item._key == k:
                self._table.pop(i)
                return
        raise KeyError(f"KeyError: '{k}'") 

    def __len__(self):
        return len(self._table)

    def __iter__(self):
        for item in self._table:
            yield item._key

|Operation | Running Time |
| --- | --- |
| __getitem__ | O(n) |
| __setitem__ | O(n) |
| __delitem__ | O(n) |

In [5]:
unsorted_map = UnsortedTableMap()
unsorted_map['imad'] = 1
unsorted_map['imad'] 

1

In [6]:
len(unsorted_map)

1

In [7]:
del unsorted_map['imad']

In [8]:
len(unsorted_map)

0

# Hash Table

In [9]:
class HashMapBase(MapBase):
    """Abstract base class for map using hash-table with MAD compression."""

    def __init__(self, capacity=11, p=109345121):
        self._table = [None] * capacity
        self._n = 0
        self._prime = p
        self._scale = randrange(1, p - 1)
        self._shift = randrange(p)

    def _hash_function(self, k):
        return (
            (hash(k) * self._scale + self._shift)
            % self._prime
            % len(self._table)
        )

    def __len__(self):
        return self._n

    def __getitem__(self, k):
        j = self._hash_function(k)
        return self._bucket_getitem(j, k)

    def __setitem__(self, k, v):
        j = self._hash_function(k)
        self._bucket_setitem(j, k, v)
        if self._n > len(self._table) // 2:
            self._resize(2 * len(self._table) - 1)

    def __delitem__(self, k):
        j = self._hash_function(k)
        self._bucket_delitem(j, k)
        self._n -= 1

    def _resize(self, capacity):
        old_table = list(self.items())
        self._table = [None] * capacity
        self._n = 0
        for k, v in old_table:
            self[k] = v

|Operation | Expected | Worst-case |
| --- | --- | --- |
| __getitem__ | O(1) | O(n) |
| __setitem__ | O(1) | O(n) |
| __delitem__ | O(1) | O(n) |

## Hash Table with Close Addressing (Separate Chaining)

In [10]:
class ChainHashMap(HashMapBase):
    """Hash map implemented using separate chaining for collision resolution."""

    def _bucket_getitem(self, j, k):
        bucket = self._table[j]
        if not bucket:
            raise KeyError(f"KeyError: {k}")
        return bucket[k]

    def _bucket_setitem(self, j, k, v):
        bucket = self._table[j]
        if not self._table[j]:
            bucket = self._table[j] = UnsortedTableMap()
        old_size = len(bucket)
        bucket[k] = v
        if len(bucket) > old_size:
            self._n += 1

    def _bucket_delitem(self, j, k):
        bucket = self._table[j]
        if not bucket:
            raise KeyError(f"KeyError: {k}")
        del bucket[k]

    def __iter__(self):
        for bucket in self._table:
            if bucket:
                for key in bucket:
                    yield key

In [11]:
d = ChainHashMap()

In [12]:
d['imad'] = 1

In [13]:
d['imad']

1

In [14]:
len(d)

1

In [15]:
d['imad'] = 10

In [16]:
d['imad']

10

In [17]:
len(d)

1

In [18]:
del d['imad']

In [19]:
len(d)

0

In [20]:
d = ChainHashMap()
for i in range(10):
    d[str(i)] = 10

In [21]:
len(d._table)

21

In [22]:
len(d)

10

In [23]:
list(d)

['2', '7', '4', '5', '1', '8', '6', '3', '9', '0']

## Hash Table with Open Addressing (Linear Probing)

In [24]:
class ProbeHashMap(HashMapBase):
    """Hash map implemented using linear probing for collision resolution."""
    
    # used as a mark to differentiate it between a cell never been
    # occupied and a cell that was deleted
    _AVAIL = object()
    
    def _is_available(self, j):
        return self._table[j] is None or self._table[j] is ProbeHashMap._AVAIL
    
    def _find_slot(self, j, k):
        first_available = None
        while True:
            if self._is_available(j):
                if not first_available:
                    first_available = j
                if not self._table[j]:
                    return (False, first_available)
            elif self._table[j]._key == k:
                return (True, j)
            j = (j + 1) % len(self._table)
    
    def _bucket_getitem(self, j, k):
        found, s = self._find_slot(j, k)
        if not found:
            raise KeyError(f'KeyError: {k}')
        return self._table[s]._value
    
    def _bucket_setitem(self, j, k, v):
        found, s = self._find_slot(j, k)
        if found:
            self._table[s]._value = v
        else:
            self._table[s] = self._Item(k, v)
            self._n += 1
    
    def _bucket_delitem(self, j, k):
        found, s = self._find_slot(j, k)
        if not found:
            raise KeyError(f'KeyError: {k}')
        self._table[s] = ProbeHashMap._AVAIL
    
    def __iter__(self):
        for i in range(len(self._table)):
            if not self._is_available(i):
                yield self._table[i]._key

In [25]:
d = ProbeHashMap()

In [26]:
d['imad'] = 1

In [27]:
d['imad']

1

In [28]:
len(d)

1

In [29]:
d['imad'] = 10

In [30]:
d['imad']

10

In [31]:
len(d)

1

In [32]:
del d['imad']

In [33]:
len(d)

0

In [34]:
d = ProbeHashMap()
for i in range(10):
    d[str(i)] = 10

In [35]:
len(d._table)

21

In [36]:
len(d)

10

In [37]:
list(d)

['5', '4', '1', '9', '7', '2', '3', '0', '8', '6']