Huffman coding is a lossless data compression algorithm that assigns variable-length codes to characters based on their frequency of occurrence, resulting in a more efficient representation of the data.

In [1]:
class Node():
    """
    Symbol = any letter
    weigth = frequency of appearance

    code = left right movement
    when moving left = 0 and right = 1
    """
    def __init__(self, symbol: str | None, weight: float) -> None:
        self.root = None
        self.symbol = symbol
        self.weight = weight
        self.code = None
        self.right = None
        self.left = None

    def __str__(self) -> str:
        return f"{self.symbol}"
    
    def __lt__(self, nxt) -> True | False:
        return self.weight < nxt.weight

In [2]:
class BinaryTree():
    def __init__(self, root: Node) -> None:
        self.root = root
        self.codes = dict()
        self.traverse(self.root)

    def traverse(self, node, val='') -> None:
        # huffman code for current node
        newVal = val
        if not node.code is None:
            newVal = val + str(node.code)

        # if node is not an edge node
        # then traverse inside it
        if(node.left):
            self.traverse(node.left, newVal)
        if(node.right):
            self.traverse(node.right, newVal)

        # if node is edge node then
        # display its huffman code
        if(not node.left and not node.right):
            self.codes[node.symbol] = newVal

    def encode(self, text: str) -> str:
        string = ""
        for letter in text:
            string += self.codes[letter]
        
        return string
    
    def decode(self, text: str) -> str:
        string = ""
        curr = self.root

        for num in text:
            if num == "0":
                curr = curr.left
            else:
                curr = curr.right

            # if node is leaf
            if curr.left is None and curr.right is None:
                string += curr.symbol
                curr = self.root
        
        return string

In [3]:
def construct_tree(text: str) -> (Node, dict):
    """
    Connecting the tree, node by node
    1) Hashmap for each letter
    2) Calculating frequency for each letter
    3) Creating priority queue
    4) Fill queue with nodes that have symbol and weight(frequency)
    5) Connecting nodes
    """
    hashmap = dict()

    # ocurence
    for letter in text:
        if letter in hashmap.keys():
            hashmap[letter] += 1
        else:
            hashmap[letter] = 1

    # frequency
    text_length = len(text)
    for key in hashmap.keys():
        hashmap[key] = hashmap[key] / text_length

    # queue 
    queue = [Node(symbol=symbol, weight=hashmap[symbol]) for symbol in hashmap.keys()]

    # sort based on weight
    queue.sort(key=lambda node: node.weight)

    # Constructing Tree
    while len(queue) > 1:
        n1, n2 = queue.pop(0), queue.pop(0)

        in_n = Node(symbol=n1.symbol+n2.symbol, weight=n1.weight+n2.weight)

        in_n.left = n1
        in_n.left.code = 0

        in_n.right = n2
        in_n.right.code = 1

        queue.insert(0, in_n)

    return queue.pop(), hashmap # root

In [7]:
text = "A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED"
print("Initial text:", text, "\n")
root, hashmap = construct_tree(text)
print("Hashmap: ", hashmap, "\n\n\n")
tree = BinaryTree(root=root)

encoded = tree.encode(text)
print(encoded, "\n\n")
decoded = tree.decode(encoded)
print(decoded)

Initial text: A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED 

Hashmap:  {'A': 0.2391304347826087, '_': 0.21739130434782608, 'D': 0.21739130434782608, 'E': 0.15217391304347827, 'C': 0.043478260869565216, 'B': 0.13043478260869565} 



100101000110100101101001000000001010001010011001000011010010000110000100010011001000010001101000101001100001100000100100001000101 


A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED


In [13]:
#? Test sum of frequncies
suma = 0
for freq in hashmap.values():
    suma += freq


assert suma == 1, f"Suma not equals 1, but equals {suma}"
print("Working Fine")

Working Fine
