Step 1: Count character frequencies in the input string manually.

Step 2: Create a node for each character with its frequency.

Step 3: Build a min-heap by sorting nodes in increasing order of frequency.

Step 4: Construct the Huffman Tree by repeatedly merging the two lowest-frequency nodes.

Step 5: Traverse the tree to assign binary codes (left = 0, right = 1) to each character.

Step 6: Encode the input string by replacing each character with its Huffman code.

In [35]:
class Node:
    def __init__(self, char=None, freq=0):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.freq < other.freq

In [37]:
def get_frequencies(text):
    freq = {}
    for ch in text:
        if ch not in freq:
            freq[ch] = 0
        freq[ch] += 1
    return freq

In [39]:
def build_min_heap(nodes):
    # insertion sort based heap
    for i in range(len(nodes)):
        for j in range(i + 1, len(nodes)):
            if nodes[j].freq < nodes[i].freq:
                nodes[i], nodes[j] = nodes[j], nodes[i]
    return nodes


In [41]:
def build_huffman_tree(freq_map):
    nodes = [Node(ch, freq) for ch, freq in freq_map.items()]
    nodes = build_min_heap(nodes)

    while len(nodes) > 1:
        # Get two nodes with lowest frequency
        left = nodes.pop(0)
        right = nodes.pop(0)

        merged = Node(None, left.freq + right.freq)
        merged.left = left
        merged.right = right

        # Insert the merged node and sort again
        nodes.append(merged)
        nodes = build_min_heap(nodes)
    # Root of Huffman tree
    return nodes[0]  


In [43]:
def generate_codes(node, code="", code_map={}):
    if node is None:
        return

    if node.char is not None:
        code_map[node.char] = code
        return

    generate_codes(node.left, code + "0", code_map)
    generate_codes(node.right, code + "1", code_map)

    return code_map

In [45]:
def encode_text(text, code_map):
    encoded = ""
    for ch in text:
        encoded += code_map[ch]
    return encoded

In [49]:
text = "aaaabbbc"

frequencies = get_frequencies(text)
root = build_huffman_tree(frequencies)
codes = generate_codes(root)

print("Character codes:")
for ch, code in codes.items():
    print(f"{ch}: {code}")

Character codes:
a: 0
c: 10
b: 11


In [51]:
encoded = encode_text(text, codes)
print("\nEncoded string:", encoded)


Encoded string: 000011111110
