# 5.4 Huffman Encoding (Simple Demo)

Huffman coding builds a prefix code with minimal total cost.

Here is a small demo using `heapq`.


In [None]:
import heapq
from collections import Counter

def huffman_codes(s):
    '''Return a list of (character, code) pairs for the Huffman encoding of s.

    We build a min-heap of (frequency, [char, code]) lists and
            repeatedly merge the two least frequent items, prepending '0'
            and '1' to their codes. Finally we sort the result by code length.
    '''
    freq = Counter(s)
    # Initialize the heap with [frequency, [character, code]] items
    heap = [[count, [ch, '']] for ch, count in freq.items()]
    heapq.heapify(heap)

    while len(heap) > 1:
        lo = heapq.heappop(heap)
        hi = heapq.heappop(heap)
        # Prepend '0' to codes in the smaller frequency list
        for pair in lo[1:]:
            pair[1] = '0' + pair[1]
        # Prepend '1' to codes in the larger frequency list
        for pair in hi[1:]:
            pair[1] = '1' + pair[1]
        # Push the merged node back onto the heap
        heapq.heappush(heap, [lo[0] + hi[0]] + lo[1:] + hi[1:])
    # Sort the codes by length then by character
    return sorted(heap[0][1:], key=lambda p: (len(p[1]), p[0]))

text = 'hello world'
for ch, code in huffman_codes(text):
    print(repr(ch), code)
