In [32]:
class Node:

    def __init__(self, frequency, symbol=None):
        self.__frequency = frequency
        self.__symbol = symbol
        self.__left = None
        self.__right = None

    def set_left(self, child):
        self.__left = child

    def set_right(self, child):
        self.__right = child

    def __lt__(self, other):
        return self.__frequency < other.__frequency

    def get_frequency(self) -> int:
        return self.__frequency

    def get_symbol(self) -> chr:
        return self.__symbol
    
    def __str__(self):   
            return f"({self.__symbol}:{self.__frequency})"
     
        
    def __repr__(self):
        return self.__str__()
    
    def get_left(self): return self.__left
    def get_right(self): return self.__right

In [33]:
message_to_compress = "HELLO WORLD"
ASCII_SYMBOLS: int = 256


def frequency_of_symbols(message: str) -> list[int]:
    frequencies = [0] * ASCII_SYMBOLS
    for char in message:
        ascii_value = ord(char)
        frequencies[ascii_value] += 1
    return frequencies


def filter_uppercase_and_spaces(input_string):
    return ''.join([char for char in input_string if char.isupper() or char == ' '])


def create_forest(frequencies):
    """Take a list of frequencies and produce a list of nodes from it.
    Every item in the input list contains a symbol and its frequency."""
    forest = []  # return item
    # Iterate over the input list
    for ascii in range(len(frequencies)):
        if frequencies[ascii] > 0:
            # Create a node for this symbol and its frequency
            new_node = Node(frequencies[ascii], chr(ascii))
            forest.append(new_node)
    return forest


def get_smallest(forest):
    smallest_index = 0
    for i in range(1, len(forest)):
        if forest[i] < forest[smallest_index]:
            smallest_index = i
    return forest.pop(smallest_index)


def huffman(forest):
    while len(forest) > 1:
        s1 = get_smallest(forest)
        s2 = get_smallest(forest)
        new_node = Node(s1.get_frequency()+s2.get_frequency())
        new_node.set_left(s1)
        new_node.set_right(s2)
        forest.append(new_node)
    return forest[0]

In [38]:
frequencies = frequency_of_symbols(message_to_compress)
print(frequencies)
forest = create_forest(frequencies)
print(forest)
huffman_tree = huffman(forest)
print(huffman_tree.get_left().get_left().get_right())

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[( :1), (D:1), (E:1), (H:1), (L:3), (O:2), (R:1), (W:1)]
(H:1)
