<a href="https://colab.research.google.com/github/EugenHotaj/pytorch-generative/blob/master/notebooks/__draft__coding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
from typing import Optional
from dataclasses import dataclass
import heapq

## Huffman Codes

In [59]:
message = "A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED"

In [75]:
@dataclass
class Node:
    data: str 
    weight: int
    left: Optional["Node"] = None
    right: Optional["Node"] = None

    def __lt__(self, other: "Node"):
        return self.weight < other.weight

In [76]:
abc = {}
for m in message:
    abc[m] = abc.get(m, 0) + 1

In [87]:
def build_tree(abc):
    nodes = []
    for k, v in abc.items():
        nodes.append(Node(data=k, weight=v))
    heapq.heapify(nodes)

    while len(nodes) > 1:
        n1, n2 = heapq.heappop(nodes), heapq.heappop(nodes)
        data = n1.data + n2.data
        weight = n1.weight + n2.weight
        new_node = Node(data=data, weight=weight, left=n1, right=n2)
        nodes.append(new_node)

    return nodes[0]

def build_codes(tree):

    def _dfs(tree, codes, prefix):
        if not tree.left:
            codes[tree.data] = prefix 
        else:
            _dfs(tree.left, codes, prefix + "0")
            _dfs(tree.right, codes, prefix + "1")

    codes = {}
    _dfs(tree, codes, "")
    return codes

def encode(message, codes):
    encoded = ""
    for m in message:
        encoded += codes[m]
    return encoded

def decode(message, codes):
    inverse_codes = {v: k for k, v in codes.items()}
    decoded = ""
    current = ""
    for m in message:
        current += m
        if current in inverse_codes:
            decoded += inverse_codes[current]
            current = ""
    return decoded

In [88]:
tree = build_tree(abc)
codes = build_codes(tree)

In [89]:
encoded = encode(message, codes)
decoded = decode(encoded, codes)

In [90]:
decoded

'A_DEAD_DAD_CEDED_A_BAD_BABE_A_BEADED_ABACA_BED'