###  Implementing Huffman Coding for Data Compression

In [1]:
import heapq
from collections import defaultdict

class Node:
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.freq < other.freq

def build_huffman_tree(text):
    frequency = defaultdict(int)
    for char in text:
        frequency[char] += 1

    priority_queue = [Node(char, freq) for char, freq in frequency.items()]
    heapq.heapify(priority_queue)

    while len(priority_queue) > 1:
        left = heapq.heappop(priority_queue)
        right = heapq.heappop(priority_queue)
        merged = Node(None, left.freq + right.freq)
        merged.left = left
        merged.right = right
        heapq.heappush(priority_queue, merged)

    return priority_queue[0]

def generate_huffman_codes(node, code='', codes={}):
    if node is None:
        return codes
    if node.char is not None:
        codes[node.char] = code
    generate_huffman_codes(node.left, code + '0', codes)
    generate_huffman_codes(node.right, code + '1', codes)
    return codes

def huffman_encoding(text):
    root = build_huffman_tree(text)
    return generate_huffman_codes(root)

def get_compressed_data_size(text, huffman_codes):
    return sum(len(huffman_codes[char]) * text.count(char) for char in set(text))

text = "hello greedy"
huffman_codes = huffman_encoding(text)
compressed_size = get_compressed_data_size(text, huffman_codes)
original_size = len(text) * 8

print("Huffman Codes:", huffman_codes)
print("Original Size:", original_size, "bits")
print("Compressed Size:", compressed_size, "bits")
print("Compression Ratio:", original_size / compressed_size)


Huffman Codes: {'l': '00', 'e': '01', 'y': '100', 'r': '1010', 'g': '1011', 'd': '1100', ' ': '1101', 'h': '1110', 'o': '1111'}
Original Size: 96 bits
Compressed Size: 37 bits
Compression Ratio: 2.5945945945945947
