In [5]:
import heapq
import os
import struct

class HuffmanNode:
    def __init__(self, char=None, freq=0):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None
    def __lt__(self, other):
        return self.freq < other.freq

def build_frequency_table(data):
    freq = {}
    for ch in data:
        freq[ch] = freq.get(ch, 0) + 1
    return freq

def build_huffman_tree(freq_table):
    heap = [HuffmanNode(char, freq) for char, freq in freq_table.items()]
    heapq.heapify(heap)
    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        parent = HuffmanNode(freq=left.freq + right.freq)
        parent.left, parent.right = left, right
        heapq.heappush(heap, parent)
    return heap[0] if heap else None

def build_codes(node, prefix="", code_map=None):
    if code_map is None: code_map = {}
    if node.char is not None:
        code_map[node.char] = prefix
    else:
        build_codes(node.left, prefix + "0", code_map)
        build_codes(node.right, prefix + "1", code_map)
    return code_map

def serialize_tree(node, bits):
    if node.char is not None:
        bits.append('1')
        bits.append(f"{ord(node.char):08b}")
    else:
        bits.append('0')
        serialize_tree(node.left, bits)
        serialize_tree(node.right, bits)

def deserialize_tree(bit_iter):
    flag = next(bit_iter)
    if flag == '1':
        byte = ''.join(next(bit_iter) for _ in range(8))
        return HuffmanNode(chr(int(byte, 2)))
    # internal node
    node = HuffmanNode()
    node.left = deserialize_tree(bit_iter)
    node.right = deserialize_tree(bit_iter)
    return node

def bits_to_bytes(bitstr):
    padding = (8 - len(bitstr) % 8) % 8
    bitstr += '0' * padding
    b = bytearray(int(bitstr[i:i+8], 2) for i in range(0, len(bitstr), 8))
    return b, padding

def bytes_to_bits(byts):
    return ''.join(f"{byte:08b}" for byte in byts)

def compress_file(input_path, output_path):
    # 1) Read
    with open(input_path, 'r', encoding='utf-8') as f:
        data = f.read()
    # 2) Build tree & codes
    freq = build_frequency_table(data)
    tree = build_huffman_tree(freq)
    codes = build_codes(tree)
    # 3) Serialize tree
    tree_bits_list = []
    serialize_tree(tree, tree_bits_list)
    tree_bitstr = ''.join(tree_bits_list)
    tree_bytes, tree_pad = bits_to_bytes(tree_bitstr)
    # 4) Encode data
    data_bitstr = ''.join(codes[ch] for ch in data)
    data_bytes, data_pad = bits_to_bytes(data_bitstr)
    # 5) Write file: [tree_pad:1B][tree_len:4B][tree_bytes][data_pad:1B][data_bytes]
    with open(output_path, 'wb') as out:
        out.write(struct.pack('B', tree_pad))
        out.write(struct.pack('>I', len(tree_bitstr)))
        out.write(tree_bytes)
        out.write(struct.pack('B', data_pad))
        out.write(data_bytes)
    orig, comp = len(data.encode('utf-8')), os.path.getsize(output_path)
    print(f"Original: {orig} B → Compressed: {comp} B")

def decompress_file(input_path, output_path):
    with open(input_path, 'rb') as f:
        tree_pad = struct.unpack('B', f.read(1))[0]
        tree_len = struct.unpack('>I', f.read(4))[0]
        # read tree bytes
        tree_bytes_len = (tree_len + tree_pad) // 8
        tree_bytes = f.read(tree_bytes_len)
        data_pad = struct.unpack('B', f.read(1))[0]
        data_bytes = f.read()
    # reconstruct tree
    tree_bits = bytes_to_bits(tree_bytes)[:tree_len]
    tree_iter = iter(tree_bits)
    tree = deserialize_tree(tree_iter)
    # reconstruct data bitstr
    data_bits = bytes_to_bits(data_bytes)
    data_bits = data_bits[:len(data_bits) - data_pad] if data_pad else data_bits
    # decode
    out = []
    node = tree
    for b in data_bits:
        node = node.left if b == '0' else node.right
        if node.char is not None:
            out.append(node.char)
            node = tree
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(''.join(out))
    print(f"Decompressed to '{output_path}'")

if __name__ == "__main__":
    # example usage:
    compress_file("input.txt", "compressed.bin")
    decompress_file("compressed.bin", "decompressed.txt")

Original: 287 B → Compressed: 183 B
Decompressed to 'decompressed.txt'
