#### Solution
# Assignment 5 - Greedy Algorithms
## Part 2 - Huffman algorithm 

### 1 - Data Structure Setup

In [1]:
class HeapNode:
    def __init__(self, character, value):
        self.character = character
        self.value = value
        self.left_child = None
        self.right_child = None

    def __lt__(self, other):
        if not isinstance(other, HeapNode):
            return False
        return self.value < other.value

    def __eq__(self, other):
        if not isinstance(other, HeapNode):
            return False
        return self.value == other.value

    def __str__(self) -> str:
        string = f"Character: '{self.character}' "
        string += f"Value: '{self.value}' "
        string += f"Left child: '{self.left_child}' "
        string += f"Right child: '{self.right_child}' "
        return string

    def __repr__(self) -> str:
        return self.__str__()

### 2 - Making the frequency heap 

In [2]:
import heapq


def make_frequency_heap(string: str) -> list:
    # Initialize freq dictionary and heap array
    freq = {}
    heap = []
    # loop through the characters in the string
    for character in string:
        if character not in freq:
            # insert them into the freq dictionary,
            # with the character being they key and the value being the frequency
            freq[character] = 1
        else:
            # increase the frequency
            freq[character] += 1

    for character in freq:
        # insert the values from the freq dictionary into the heap
        node = HeapNode(character, freq[character])
        heapq.heappush(heap, node)

    return heap


teststring = "ABBCCCDDDD"
heap = make_frequency_heap(teststring)
for node in heap:
    print(node)

Character: 'A' Value: '1' Left child: 'None' Right child: 'None' 
Character: 'B' Value: '2' Left child: 'None' Right child: 'None' 
Character: 'C' Value: '3' Left child: 'None' Right child: 'None' 
Character: 'D' Value: '4' Left child: 'None' Right child: 'None' 


### 3 - Merging the codes

In [3]:
def merge_code(heap: list) -> HeapNode:
    # While there is more than one node in the heap
    while len(heap) > 1:
        # Extract the two nodes with the lowest frequency letters from the heap
        # (remember that the letter with the lowest frequency will always be at the top of the heap)
        node1 = heapq.heappop(heap)
        node2 = heapq.heappop(heap)
        # Create a new node that has the sum of the values of the two nodes as its value,
        # and the two nodes as left and right child respectively
        newnode = HeapNode(None, node1.value + node2.value)
        newnode.left_child = node1
        newnode.right_child = node2
        # Push this new node into the heap
        heapq.heappush(heap, newnode)
    # Return the root of the tree
    return heap[0]

### 4 - Traversing the tree

In [4]:
def traverse_huffman(rootnode: HeapNode) -> dict:
    # Store the codes for each letter
    codes = {}
    # Keeps track of the current code
    current_code = ""
    # traverse recursively
    traverse_huffman_recursive(rootnode, current_code, codes)
    # return finished encoding
    return codes


def traverse_huffman_recursive(node: HeapNode, current_code: str, codes: dict) -> None:
    # if there exists a character in the node,
    if node.character != None:
        # append current_code as the value and the character
        codes[node.character] = current_code
        # as the key in codes and return
        return

    # Left side of the tree, append 0 to the current node
    traverse_huffman_recursive(node.left_child, current_code + "0", codes)
    # Right side of the tree, append 1 to the current node
    traverse_huffman_recursive(node.right_child, current_code + "1", codes)

### Running the program
Here we have a main function to run the whole program, use it to test if you get the correct output: 

In [5]:
def main():
    text = "ABBBBCCCDDEEEEAAAEEBBBCC"
    heap = make_frequency_heap(text)
    rootnode = merge_code(heap)
    encoding = traverse_huffman(rootnode)
    print(encoding)


main()

{'C': '00', 'E': '01', 'D': '100', 'A': '101', 'B': '11'}
