# Hash Tables

In [None]:
"""
Hashtable class example
of a ContactList class

** note: below hash function and equals methods are very inefficient
"""

class ContactList:
    def __init__(self, names):
        '''
        names is a list of strings.
        '''
        self.names = names
        
    def __hash__(self):
        # Conceptually we want to hash the set of names. Since the set type is
        # mutable, it cannot be hashed. Therefore we use frozenset.
        return hash(frozenset(self.names))
    
    def __eq__(self, other):
        return set(self.names) == set(other.names)
    
def merge_contact_lists(contacts: list[ContactList]) -> ContactList:
    '''
    contacts is a list of ContactList.
    '''
    return list(set(contacts))

"""
Time Complexity: O(n) for computing hash, where n is the number of strings in contact list
"""

In [None]:
"""
Example of using collections.counter
good for keeping track of key occurrences
"""

c = collections.Counter(a=3, b=1)
d = collections.Counter(a=1, b=2)
# add two counters together: c[x] + d[x], collections.Counter({'a': 4, 'b': 3})
c + d
# subtract (keeping only positive counts), collections.Counter({'a': 2})
c - d
# intersection: min(c[x], d[x]), collections.Counter({'a': 1, 'b': 1})
c & d
# union: max(c[x], d[x]), collections.Counter({'a': 3, 'b': 2})
c | d

**Question 12.2**: Is an anonymous letter constructible?

In [1]:
from collections import defaultdict
def anonymous_letter_construction(letter: str, zine: str) -> bool:
    """
    Since the anonymous letter can only be created in a character is
    available in the maga(zine), we can create a hash of all the
    characters in the zine and then parse through the letter and
    make sure the necessary amount of characters is in the hash.
    
    Time Complexity: O(n+m) where n is the length of the zine and
        m is the length of the letter
    Space Complexity: O(L) where L is the number of unique characters
        in the zine. Can argue for O(1) complexity since there's only
        a finite number of characters we can 
    """
    
    # First create the hash of characters and populate it using the
    # given zine
    bag = defaultdict(0)
    for s in zine:
        bag[s] += 1
    
    for let in letter:
        if bag[let] == 0:
            return False
        else:
            bag[let] -= 1
    
    return True

In [2]:
"""
Book Answer
    + my reviewing comments

Time Complexity: O(m+n) where m and n are the numbers of characters in the letter
    and magazine, respectively. (This has a better optimal time than my code)
Space Complexity: O(L) where L is the number of distinct characters appearing in the letter
    (also potentially better space complexity on the assumption that len(magazine_text) >
    len(letter_text) more often than not.)
"""

def is_letter_constructible_from_magazine(letter_text: str,
                                          magazine_text: str) -> bool:
    # Compute the frequencies for all chars in letter_text.
    # collecitons.Counter might be more efficient since we're just counting
    # frequencies? as opposed to defaultdict.
    char_frequency_for_letter = collections.Counter(letter_text)
    
    # They also has the letter rather than the magazine which could lead
    # to a shorter best case time complexity since we might not have to
    # parse through the whole magazine_text
    
    # Checks if characters in magazine_text can cover characters in
    # char_frequency_for_letter.
    for c in magazine_text:
        if c in char_frequency_for_letter:
            char_frequency_for_letter[c] -= 1
            if char_frequency_for_letter[c] == 0:
                del char_frequency_for_letter[c]
            if not char_frequency_for_letter:
                # All characteres for letter_text are matched
                return True
    
    # Empty char_frequency_for_letter means every char in letter_text can be
    # covered by a character in magazine_text.
    return not char_frequency_for_letter


# Pythonic solution that exploits collections.Counter. Note that the
# subtraction only keeps keys with positive counts.
def is_letter_constructible_from_magazine_pythonic(letter_text: str,
                                                  magazine_text: str) -> bool:
    return (not collections.Counter(letter_text) 
            - collections.Counter(magazine_text))

**Question 12.3**: Implement an ISBN cache

ISBN is a string of length 10. First 9 chars are digits; last char is a check character. Check character is sum of first 9 digits, mod 11, with 10 repesented by 'X'.

Create a cache for looking up prices of books identified by their ISBN. Treat ISBN and prices as positive integers. implement the following:
+ Insert: if ISBN already present, insert should not update the price, but should update that ISBN to be the most recently used entry.
+ Lookup: given an ISBN, return the corresponding price; if element is not present, return -1. If ISBN is present, update that entry to be the most recently used ISBN.
+ Erase: remove the specified ISBN and corresponding value from the case. return true if the ISBN was present; otherwise, return false.

*hint*: Use an auxiliary data structure.



In [2]:
class ListNode:
    def __init__(self, key: str, val: int, next=None):
        self.key = key
        self.val = val
        self.next = next

class ISBN_cache:
    
    def __init__(self):
        # make a hash table that'll use the last digit of the
        # ISBN as the hashkey
        # we'll use a linked list structure to keep track of
        # ISBNs are its price
        # the most recently used entry will always be at the front of the list.
        # each hashkey will have a dummy head.
        self.mod = 11
        self.cache = [ListNode(0, 0)] * self.mod
        
        
    def get_key(isbn: str) -> int:
        if isbn[9] == 'X':
            return 10
        return int(isbn[9])
    
    
    def insert(self, isbn: str, price: int):
        hkey = self.get_key(isbn)
        check = self.cache[hkey].next
        
        while check.next:
            if check.next.key == isbn:
                temp = check.next
                check.next = temp.next
                temp.next, self.cache[hkey].next = self.cache[hkey].next, temp
                return
        self.cache[hkey].next = ListNode(isbn, price, self.cache[hkey].next)
        
        
    def lookup(self, isbn: str) -> int:
        hkey = self.get_key(isbn)
        check = head = self.cache[hkey]
        while check.next:
            if check.next.key == isbn:
                temp = check.next
                check.next = temp.next
                temp.next, head.next = head.next, temp
                return temp.price
        return -1
    
    
    def erase(self, isbn: str) -> bool:
        hkey = self.get_key(isbn)
        check = self.cache[hkey]
        while check.next:
            if check.next.key == isbn:
                check.next = check.next.next
                return True
        return False

The thing I missed in my answer is the existence of a 'capacity' for the cache which makes it a cache in the first place. I also became too focused on the structure of an isbn so I didn't make the whole isbn a key instead. The ordering I somewhat got right, by using a linked list queue.

The book solves most of the issues I had by using collections.OrderedDict which automatically keeps the keys in an stable order.  They also took the isbn as an int instead of a String (which I thought was slightly strange since they mentioned that the last char in an ISBN can be 'X')

- Time: lookup, updating queue, and insert are all O(1).
- Space: O(n) where n is the capacity passed through cache declaration.

In [3]:
# Book answer
# with added comments

import collections
class LruCache:
    def __init__(self, capacity: int) -> None:
        self._isbn_price_table: collections.OrderedDict[
            int, int] = collections.OrderedDict()
        self.capacity = capacity
        
    def lookup(self, isbn: int) -> int:
        if isbn not in self._isbn_price_table:
            return -1
        # if it's in the cache, we pop and re-add to queue t
        # update its state as last recently searched item
        price = self._isbn_price_table.pop(isbn)
        self._isbn_price_table[isbn] = price
        return price
    
    def insert(self, isbn: int, price: int) -> None:
        # We add the value for key only if key is not present - we don't update
        # existing values.
        if isbn in self._isbn_price_table:
            price = self._isbn_price_table.pop(isbn)
        elif len(self._isbn_price_table) == self.capacity:
            # we need to remove least searched isbn to make space
            self._isbn_price_table.popitem(last=False)
        self._isbn_price_table[isbn] = price
        
    def erase(self, isbn: int) -> bool:
        return self._isbn_price_table.pop(isbn, None) is not None

**Question 12.5**: Find the nearest repeated entries in an array

Write a program that takes an input as an array and finds the distance between a closest pair of equal entries.

Because we only need to find the distance, we can have a hash table that uses the word as the key and holds the last seen instance of the word. Then if we see the word again, we compare the distance and see if it's less than the distance we current hold.

- Time: O(n) where n is the len of the array since we have to check every word.
- Space: O(m) where m is the number of unique words in the array

In [5]:
def nearest_repeated_entries(entries: list[str]) -> int:
    # dict that'll contain each word and where it was last seen
    words = {}
    nearest = len(entries)
    
    for i, word in enumerate(entries):
        if word in words:
            nearest = min(nearest, i - entries[word])
        entries[word] = i
    return nearest

The book solution is essentially the same with a bit more formatting specifications.

The biggest difference would be that in the return, they accounted for the corner case of having no duplicates. In mine, the size of the array is simply returned which doesn't let the user know that no matches were found.

In [6]:
# Book solution

def find_nearest_repetition(paragraph: list[str]) -> int:
    word_to_latest_index: Dict[str, int] = {}
    nearest_repeated_distance = float('inf')
    for i, word in enumerate(paragraph):
        if word in word_to_latest_index:
            latest_equal_word = word_to_latest_index[word]
            nearest_repeated_distance = min(nearest_repeated_distance, 
                                            i - latest_equal_word)
        word_to_latest_index[word] = i
    return typing.cast(int, nearest_repeated_distance
                      ) if nearest_repeated_distance != float('inf') else -1