#### Solutions to the problems of Chapter 10
### Maps, Hash Tables, and Skip Lists

## Basic implementations and helper classes

In [1]:
import random
import time
from utils import PositionalList
import collections

In [2]:
from collections import abc
class MapBase(abc.MutableMapping):
    class _Item:
        __slots__='_key', '_value'
        
        def __init__(self, k, v):
            self._key = k
            self._value = v
            
        def __eq__(self, other):
            return self._key == other._key 
        
        def __neq__(self, other):
            return not(self == other) 
        
        def __lt__(self, other):
            return self._key < other._key 

In [3]:
class UnsortedTableMap(MapBase):
    
    def __init__(self):
        self._table = [] 
        
    def __getitem__(self, k):
        for item in self._table:
            if k == item._key:
                return item._value 
        raise KeyError('Key Error: '+repr(k))
        
    def __setitem__(self, k, v):
        for item in self._table:
            if k == item._key:
                item._value = v 
                return
        self._table.append(self._Item(k, v))
                
    def __delitem__(self, k):
        for i in range(len(self._table)):
            if k == self._table[i]._key:
                self._table.pop(i)
                return 
        raise KeyError('Key Error: '+repr(k))
                
    def __len__(self): 
        return len(self._table)
    
    def __iter__(self): 
        for item in self._table:
            yield item._key 

#### word frequencies example

In [4]:
di = {}
with open('raven', 'r') as f: 
    pieces = f.read().lower().split()
    for piece in pieces: 
        word = ''.join(c for c in piece if c.isalpha())
        di[word] = di.get(word, 0)+1 
    
    
sorted_dict = sorted(di.items(), key= lambda x:x[1], reverse=True)
# for item in sorted_dict:
#     print(item, end = " ")

In [5]:
class HashMapBase(MapBase):
    "Abstract base class for map using hash-table with MAD compression"
    
    def __init__(self, cap=11, p=49331):
        self._table = cap*[None]
        self._n = 0
        self._prime = p
        self._scale = 1+random.randrange(p-1)
        self._shift = random.randrange(p)
        
    def _hash_function(self, k):
        return (hash(k)*self._scale + self._shift) % self._prime % len(self._table)
        
    def __len__(self):
        return self._n 
    
    def __getitem__(self, k):
        j = self._hash_function(k)
        return self._bucket_getitem(j, k)
    
    def __setitem__(self, k, v):
        j = self._hash_function(k)
        self._bucket_setitem(j, k, v)
        if self._n >= len(self._table)//2:
            self._resize(2*len(self._table)-1)
        
    def __delitem__(self, k):
        j = self._hash_function(k)
        self._bucket_delitem(j, k)
        self._n -= 1 
        
    def _resize(self, c):
        old = list(self.items())
        self._table = c*[None]
        self._n = 0
        for (k, v) in old:
            self[k] = v

In [6]:
class ChainHashMap(HashMapBase):
    
    def _bucket_getitem(self, j, k):
        bucket = self._table[j]
        if bucket is None:
            raise KeyError('Key Error: '+repr(k))
        return bucket[k]
    
    def _bucket_setitem(self, j, k ,v):
        if self._table[j] is None:
            self._table[j] = UnsortedTableMap()
        oldsize = len(self._table[j])
        self._table[j][k] = v
        if len(self._table[j]) > oldsize:
            self._n += 1 
        
    def _bucket_delitem(self, j, k):
        bucket = self._table[j]
        if bucket is None:
            raise KeyError('Key Error: '+repr(k))
        del bucket[k]
        
    def __iter__(self): 
        for bucket in self._table:
            if bucket is not None:
                for key in bucket:
                    yield key

    def items(self):
        for item in self._table:
            yield (item._key, item._value)

In [7]:
class ProbeHashMap(HashMapBase):
    _AVAIL = object() 
    
    def _is_available(self, j):
        return self._table[j] is None or self._table[j] is ProbeHashMap._AVAIL
    
    def _find_slot(self, j, k):
        firstAvail = None
        while True:
            if self._is_available(j):
                if firstAvail is None:
                    firstAvail = j
                if self._table[j] is None:
                    return (False, firstAvail)
            elif k == self._table[j]._key:
                return (True, j)
            j = (j+1)%len(self._table)
    
    def _bucket_getitem(self, j, k):
        found, s = self._find_slot(j, k)
        if not found:
            raise KeyError('Key Error: '+repr(k))
        return self._table[s]._value 
    
    def _bucket_setitem(self, j, k, v):
        found, s = self._find_slot(j, k)
        if not found:
            self._table[s] = self._Item(k, v)
            self._n += 1 
        else:
            self._table[s]._value = v 
            
    def _bucket_delitem(self, j, k):
        found, s = self._find_slot(j, k)
        if not found:
            raise KeyError('Key Error: '+repr(k))
        self._table[s] = ProbeHashMap._AVAIL
        
    def __iter__(self):
        for j in range(len(self._table)):
            if not self._is_available(j):
                yield self._table[j]._key

In [8]:
class SortedTableMap(MapBase): 
    
    def _find_index(self, k ,low, high): 
        if high <low:
            return high+1 
        else:
            mid = (low+high)//2
            if k ==self._table[mid]._key:
                return mid 
            elif k < self._table[mid]._key:
                return self._find_index(key, low, mid-1)
            else:
                return self._find_index(key, mid+1, high)
    
    def __init__(self):
        self._table = []
    
    def __len__(self):
        return len(self._table)
    
    def __getitem__(self, k):
        j = self._find_index(k, 0, len(self._table)-1)
        if j == len(self._table) or self._table[j]._key != k:
            raise KeyError('Key Error: '+repr(k))
        return self._table[j]._value
    
    def __setitem__(self, k, v):
        j = self._find_index(k, 0, len(self._table)-1)
        if j <len(self._table) and self._table[j]._key == k:
            self._table[j]._value = v
        else:
            self._table.insert(j, self._Item(k, v))
           
    def __delitem__(self, k):
        j = self._find_index(k, 0, len(self._table)-1)
        if j == len(self._table) or self._table[j]._key != k:
            raise KeyError(' Key Error: '+repr(k))
        self._table.pop(j)
        
    def __iter__(self):
        for item in self._table:
            yield item._key 
            
    def __reversed__(self):
        for item in reversed(self._table):
            yield item._key
            
    def find_min(self):
        if len(self._table)>0:
            return (self._table[0]._key, self._table[0]._value)
        else:
            return None 
    
    def find_max(self):
        if len(self._table)>0:
            return (self._table[-1]._key, self._table[-1]._value)
        else:
            return None 

    def find_ge(self, k):
        j = self._find_index(k, 0, len(self._table)-1)
        if j<(len(self._table)):
            return (self._table[j]._key, self._table[j]._value)
        else:
            return None 
    
    def find_lt(self, k):
        j = self._find_index(k, 0, len(self._table)-1)
        if j>0:
            return (self._table[j-1]._key, self._table[j-1]._value)
        else:
            return None 
    
    def find_gt(self, k):
        j = self._find_index(k, 0, len(self._table)-1)
        if j<len(self._table) and self._table[j]._key == k:
            j+=1
        if j<len(self._table):
            return (self._table[j]._key, self._table[j]._value)
        else:
            return None
        
    def find_range(self, start, stop):
        if start is None:
            j = 0
        else:
            j = self._find_index(start, 0, len(self._table)-1)
            while j<len(self._table) and (stop is None or self._table[j]._key <stop):
                yield (self._table[j]._key, self._table[j]._value)
                j+=1 

In [9]:
class MultiMap:
    _MapType = dict 
    
    def __init__(self):
        self._map = self._MapType() 
        self._n = 0
        
    def __iter__(self):
        for k, secondary in self._map.items():
            for v in secondary:
                yield(k, v)
    
    def add(self, k, v):
        container = self._map.setdefault(k, [])
        container.append(v)
        self._n +=1 
        
    def pop(self, k):
        secondary = self._map[k]
        v = secondary.pop() 
        if len(secondary) ==0:
            del self._map[k]
        self._n -= 1
        return (k, v)
    
    def find(self, k):
        secondary = self._map[k]
        return (k, secondary[0])
    
    def find_all(self, k):
        secondary = self._map.get(k, [])
        for v in secondary:
            yield (k, v)

****Reinforcement Problems***

R-10.1 Give a concrete implementation of the pop method in the context of the MutableMapping class, relying only on the five primary abstract methods of that class. 

In [10]:
def pop(k):
    self._delItem_(k)

R-10.2 Give a concrete implementation of the items() method in the context of the MutableMapping class, relying only on the five primary abstract methods of that class. What would its running time be if directly applied to the UnsortedTableMap subclass?

In [11]:
def items():
    for key in self:
        yield (key, self[key])
        
# run time is o(n^2) if applied directly with unsorted map 

R-10.3 Give a concrete implementation of the items() method directly within the UnsortedTableMap class, ensuring that the entire iteration runs in O(n) time.

In [12]:
class ItemsUnsortedTableMap(UnsortedTableMap):
    def items(self):
        for item in self._table:
            yield (item._key, item._value)

In [13]:
m = ItemsUnsortedTableMap()
m['cat'] = 10
m['dog'] = 20

print(list(m.items()))

[('cat', 10), ('dog', 20)]


R-10.4 What is the worst-case running time for inserting n key-value pairs into an initially empty map M that is implemented with the UnsortedTableMap class?

In [14]:
# O(n)* ---> amortized "migh need to resize table"

R-10.5 Reimplement the UnsortedTableMap class from Section 10.1.5, using the PositionalList class from Section 7.4 rather than a Python list.

In [15]:
# not very useful

R-10.7 Our Position classes for lists and trees support the __ eq __ method so that two distinct position instances are considered equivalent if they refer to the same underlying node in a structure. For positions to be allowed as keys
in a hash table, there must be a definition for the hash method that is consistent with this notion of equivalence. Provide such a hash method.

In [16]:
# we can use the name of the same object both Positions point to, convert it into a hashable python object and using python hash function, or convert it's string representation into a hash code

R-10.15 Our HashMapBase class maintains a load factor λ ≤ 0.5. Reimplement that class to allow the user to specify the maximum load, and adjust the concrete subclasses accordingly

In [17]:
class AudjustableLoadHashMapBase(ChainHashMap):

    def __init__(self, cap=11, p=109345121, max_load=0.5):
        super().__init__( cap=cap,)
        self._max_load = max_load
        
    def __setitem__(self, k, v):
        j = self._hash_function(k)
        self._bucket_setitem(j, k, v)
        if self._n/len(self._table) >= self._max_load:
            self._resize(2*len(self._table)-1)

In [19]:
# d = AudjustableLoadHashMapBase(cap=11, max_load=0.2)

# d['a']=10
# print(d._n, len(d._table))

# d['b']=20
# print(d._n, len(d._table))

# d['c']=30
# print(d._n, len(d._table))

# d['d']=40

# d['e']=50
# print(d._n, len(d._table))

# d['f']=60
# print(d._n, len(d._table))


R-10.16 Give a pseudo-code description of an insertion into a hash table that uses quadratic probing to resolve collisions, assuming we also use the trick of replacing deleted entries with a special “deactivated entry” object.

In [20]:
# will be solved with a code in next problem

R-10.17 Modify our ProbeHashMap to use quadratic probing

In [21]:
class QuadraticProbeHashMap(ProbeHashMap):
    def _find_slot(self, j, k):
        firstAvail = None
        while True:
            if self._is_available(j):
                if firstAvail is None:
                    firstAvail = j
                if self._table[j] is None:
                    return (False, firstAvail)
            elif k == self._table[j]._key:
                return (True, j)
            j = (j**2+1)%len(self._table) 

R-10.18 Explain why a hash table is not suited to implement a sorted map.

because hashing functions don’t necessarily preserve the order of the data. 

R-10.19 Describe how a sorted list implemented as a doubly linked list could be used to implement the sorted map ADT

In [22]:
# very straight forward 


In [23]:
## Solves many of the skiplist problems 

##### Skiplist implementation

In [24]:
class SkipList:
    class _Node:
        def __init__(self, key, above=None, below=None, nxt=None, prev=None):
            self._key = key
            self._above = above
            self._below = below
            self._next = nxt
            self._prev = prev
            
    def __init__(self):
        self._height = 0
        self._head = self._Node(key=float('-inf'))
        self._tail = self._Node(key=float('inf'), prev=self._head)
        self._head._next = self._tail
            
    
    def _candidate_search(self, key): 
        current = self._head 
        while(current._below is not None):
            current = current._below
            while(key >= current._next._key):
                current = current._next
        return current
    
    
    def add(self, key):
        position = self._candidate_search(key)
        if position._key == key:
            return position
        level = -1
        odds = -1
        while(True):
            # do
            odds += 1
            level += 1
            
            self._adjust_levels(level)
            q = position
            while position._above is None:
                position = position._prev
            position = position._above
            
            self._insert_after_above(position, q, key)
            # while
            if random.choice([True, False]) is not True:
                break
    
    def _adjust_levels(self, level):
        if level >= self._height:
            self._height += 1
            
            newHead = self._Node(key=float('-inf'))
            newTail =  self._Node(key=float('inf'), prev = newHead)
            
            newHead._next = newTail
            newHead._below = self._head
            newTail._below = self._tail
            
            self._head._above = newHead
            self._tail._above = newTail
            
            self._head = newHead
            self._tail = newTail
            
    def _insert_after_above(self, p, q, key):
        new_node = self._Node(key=key)
        before_new_node = p._below._below
        
        self._set_before_and_after_references(q, new_node)
        self._set_above_and_below_references(p, new_node, before_new_node, key)
        
        return new_node
    
    def _set_before_and_after_references(self, q, new_node):
        new_node._next = q._next
        q._next._prev = new_node
        new_node._prev = q
        q._next = new_node
    
    def _set_above_and_below_references(self, p, new_node, before_new_node, key):
        if before_new_node != None:
            while True:
                if before_new_node._next._key != key:
                    before_new_node = before_new_node._next 
                else:
                    break
        
            new_node._below = before_new_node._next
            before_new_node._next._above = new_node
        
        if p is not None:
            if p._next._key == key:
                new_node._above = p._next 
                
    def remove_node(self, key):
        to_be_removed = self._candidate_search(key)
        if to_be_removed._key != key:
            return None
        while to_be_removed is not None: 
            after_node_to_be_removed = to_be_removed._next
            before_to_be_removed = to_be_removed._prev
            
            after_node_to_be_removed._prev = before_to_be_removed
            before_to_be_removed._next = after_node_to_be_removed
            
            
            if to_be_removed._above is not None:
                to_be_removed = to_be_removed._above
            else:
                break
            
        
    def __repr__(self):
        printstr = [] 
        printstr.append("\n SkipList starting with top-left node.\n")
        
        starting = self._head 
        highest_level = starting
        level = self._height
        while highest_level is not None:
            printstr.append("\n Level: "+str(level)+ '\n')
            while starting is not None:
                printstr.append(str(starting._key))
                if starting._next is not None:
                    printstr.append(" -> ")
                starting = starting._next
            printstr.append("\n")
            highest_level = highest_level._below
            starting = highest_level
            level -= 1
        return ''.join(string for string in printstr)
                

In [25]:
skplst = SkipList()
for i in range(15):
    skplst.add(i)
print(skplst)


 SkipList starting with top-left node.

 Level: 6
-inf -> inf

 Level: 5
-inf -> 2 -> inf

 Level: 4
-inf -> 2 -> inf

 Level: 3
-inf -> 2 -> 4 -> 11 -> inf

 Level: 2
-inf -> 2 -> 4 -> 5 -> 11 -> inf

 Level: 1
-inf -> 1 -> 2 -> 4 -> 5 -> 6 -> 8 -> 11 -> inf

 Level: 0
-inf -> 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9 -> 10 -> 11 -> 12 -> 13 -> 14 -> inf



R-10.25 Give a concrete implementation of the pop method, in the context of a MutableSet abstract base class, that relies only on the five core set behaviors described in Section 10.5.2

In [26]:
class PopappleSet(collections.MutableSet): 
    def pop():
        item = next(iter(self))
        self.remove(item)

  class PopappleSet(collections.MutableSet):


R-10.26 Give a concrete implementation of the isdisjoint method in the context of the MutableSet abstract base class, relying only on the five primary abstract methods of that class. Your algorithm should run in O(min(n,m)) where n and m denote the respective cardinalities of the two sets.

In [27]:
class SmartSet(set):
    def isdisjoint(self, other):
        if len(self) > len(other):
            for item in other:
                if item not in self:
                    return True
            return False 
        else:
            for item in self:
                if item not in other:
                    return True
            return False

R-10.27 What abstraction would you use to manage a database of friends’ birthdays in order to support efficient queries such as “find all friends whose birthday is today” and “find the friend who will be the next to celebrate a
birthday

In [28]:
# sorted Multi-Map

***Creativity Problems***

C-10.28 On page 406 of Section 10.1.3, we give an implementation of the method setdefault as it might appear in the MutableMapping abstract base class. While that method accomplishes the goal in a general fashion, its efficiency is less than ideal. In particular, when the key is new, there will be a failed search due to the initial use of getitem , and then a subsequent insertion via setitem . For a concrete implementation, such as the UnsortedTableMap, this is twice the work because a complete scan of the table will take place during the failed getitem , and then another complete scan of the table takes place due to the implementation of setitem . A better solution is for the UnsortedTableMap class to override setdefault to provide a direct solution that performs a single search. Give such an implementation of UnsortedTableMap.setdefault.

In [29]:
class SetDefaultUnsortedTableMap(UnsortedTableMap):
    def setdefault(self, k, v): 
        for item in self._table:
            if k == item._key:
                return item._value
        self._table.append(self._Item(k, v))
        
l = SetDefaultUnsortedTableMap()
l['a']=5
l['b']=6

for item in l:
    print(item, l[item])

print(l.setdefault("a", 7))
print(l.setdefault("c", 10))
for item in l:
    print(item, l[item])

a 5
b 6
5
None
a 5
b 6
c 10


C-10.29 Repeat Exercise C-10.28 for the ProbeHashMap class.

In [None]:
class SetDefualtProbeHashMap(ProbeHashMap):
    def setdefault(self, k, v):
        j = self._hash_function(k):
            return self._bucket_setdefault(j, k)
    
    def _bucket_setdefault(j, k, v):
        found, s = self._find_slot(j, k):
        if not found:
            self._table[s] = self._Item(k, v)
            self._n += 1
        else:
            return self._table[s] 

C-10.30 Repeat Exercise C-10.28 for the ChainHashMap class.

In [None]:
class SetDefaultChainHashMap(ChainHashMap):
    def setdefault(self, k, v):
        j = self._hash_function(k):
            return self._bucket_setdefault(j, k)
    
    def _bucket_setdefault(j, k, v):
        bucket = self._table[j]
        if bucket is None:
            return bucket.setdefault(k, v)
        return bucket[k]

C-10.31 For an ideal compression function, the capacity of the bucket array for a hash table should be a prime number. Therefore, we consider the problem of locating a prime number in a range [M,2M]. Implement a method for finding such a prime by using the sieve algorithm. In this algorithm, we allocate a 2M cell Boolean array A, such that cell i is associated with the integer i. We then initialize the array cells to all be “true” and we “mark off” all the cells that are multiples of 2, 3, 5, 7, and so on. This process can stop after it reaches a number larger than √2M. (Hint: Consider a bootstrapping method for finding the primes up to √2M.)

In [72]:
def get_primes(n):
    initials = [True]*n 
    limit = int(n**0.5)+1
    initials[0],initials[1] =False, False
    
    for i in range(2, limit):
        if initials[i] is False:
            continue
        for j in range(i, n):
            if i*j >=n:
                break
            initials[i*j] = False
    primes = [] 
    for i, item in enumerate(initials):
        if item is True:
            primes.append(i)
    return primes

print(get_primes(100))

[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]


C-10.32 Perform experiments on our ChainHashMap and ProbeHashMap classes to measure its efficiency using random key sets and varying limits on the load factor (see Exercise R-10.15).

In [73]:
# straight forward

C-10.33 Our implementation of separate chaining in ChainHashMap conserves memory by representing empty buckets in the table as None, rather than as empty instances of a secondary structure. Because many of these buckets will hold a single item, a better optimization is to have those slots of the table directly reference the Item instance, and to reserve use of secondary containers for buckets that have two or more items. Modify our implementation to provide this additional optimization.

In [74]:
# requirements not clear


C-10.34 Computing a hash code can be expensive, especially for lengthy keys. In our hash table implementations, we compute the hash code when first inserting an item, and recompute each item’s hash code each time we resize our table. Python’s dict class makes an interesting trade-off. The hash code is computed once, when an item is inserted, and the hash code is stored as an extra field of the item composite, so that it need not be recomputed. Reimplement our HashTableBase class to use such an app

In [77]:
class HashMapBase(MapBase):
    class _Item:
        __slots__='_key', '_value', '_hash'
    
    def __init__(self, k, v):
        self._key = k
        self._value = v
        self._hash = self._hash_function(k)
    
    def __init__(self, cap=11, p=49331):
        self._table = cap*[None]
        self._n = 0
        self._prime = p
        self._scale = 1+random.randrange(p-1)
        self._shift = random.randrange(p)
        
    def _hash_function(self, k):
        return (hash(k)*self._scale + self._shift) % self._prime % len(self._table)
        
    def __len__(self):
        return self._n 
    
    def __getitem__(self, k):
        j = self._hash_function(k)
        return self._bucket_getitem(j, k)
    
    def __setitem__(self, k, v):
        j = self._hash_function(k)
        self._bucket_setitem(j, k, v)
        if self._n >= len(self._table)//2:
            self._resize(2*len(self._table)-1)
        
    def __delitem__(self, k):
        j = self._hash_function(k)
        self._bucket_delitem(j, k)
        self._n -= 1 
        
    def _resize(self, c):
        old = list(self.items())  ##items must return triplets (key, value, hash)
        self._table = c*[None]
        self._n = 0
        for (k, v, _hash) in old:
            self._bucket_setitem(_hash, k, v)

C-10.35 Describe how to perform a removal from a hash table that uses linear probing to resolve collisions where we do not use a special marker to represent deleted elements. That is, we must rearrange the contents so that it appears that the removed entry was never inserted in the first place.