<a href="https://colab.research.google.com/github/RajAakash/AlgorithmAssignment/blob/main/nvc22Assignment4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import heapq
from collections import defaultdict, Counter

class Node:
    def __init__(self, char, freq):
        self.char = char
        self.freq = freq
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.freq < other.freq

def build_huffman_tree(text):
    frequency = Counter(text)
    heap = [Node(char, freq) for char, freq in frequency.items()]
    heapq.heapify(heap)

    while len(heap) > 1:
        left = heapq.heappop(heap)
        right = heapq.heappop(heap)
        merged = Node(None, left.freq + right.freq)
        merged.left = left
        merged.right = right
        heapq.heappush(heap, merged)

    return heap[0]

def build_huffman_codes(node, code="", mapping=None):
    if mapping is None:
        mapping = {}
    if node is not None:
        if node.char is not None:
            mapping[node.char] = code
        build_huffman_codes(node.left, code + "0", mapping)
        build_huffman_codes(node.right, code + "1", mapping)
    return mapping

def huffman_encode(text, mapping):
    encoded_text = ''.join(mapping[char] for char in text)
    return encoded_text

def huffman_decode(encoded_text, tree):
    decoded_text = ""
    current_node = tree

    for bit in encoded_text:
        if bit == '0':
            current_node = current_node.left
        else:
            current_node = current_node.right

        if current_node.char is not None:
            decoded_text += current_node.char
            current_node = tree

    return decoded_text

def main():
    # Input text
    input_text = "This is a sample text for Huffman encoding,decoding. It has mnultiple line of texts and is used for huffman encoding. This is one part of sentence."

    # Build Huffman tree
    huffman_tree = build_huffman_tree(input_text)

    # Build Huffman codes
    huffman_mapping = build_huffman_codes(huffman_tree)

    # Encode the input text
    encoded_text = huffman_encode(input_text, huffman_mapping)

    # Display variable length binary code for each character
    for char, code in huffman_mapping.items():
        print(f"{char}: {code}")

    # Display the compressed encoded codes for the entire input text
    print("Encoded Text:", encoded_text)

    # Decode the encoded text back to the original text
    decoded_text = huffman_decode(encoded_text, huffman_tree)

    # Display the decoded text
    print("Decoded Text:", decoded_text)

if __name__ == "__main__":
    main()

e: 000
a: 0010
p: 00110
m: 00111
t: 0100
o: 0101
c: 01100
l: 01101
f: 0111
h: 10000
T: 100010
I: 1000110
,: 1000111
i: 1001
s: 1010
u: 10110
g: 101110
H: 1011110
x: 1011111
 : 110
d: 11100
r: 111010
.: 111011
n: 1111
Encoded Text: 1000101000010011010110100110101100010110101000100011100110011010001100100000101111101001100111010111101011010111101011001110111001110010111111000011110110001011110010011111101110100011111100000011000101111001001111110111011101111010001100100110100000010101011000111111110110011010100100100110011010001100110110011111000110010101111100100000101111101001010110001011111110011010011010110101101010000111001100111010111101011010000101100111011100111001011111100001111011000101111001001111110111011101111010001010000100110101101001101011001011111000110001100010111010010011001010111110101000011110100000111101100000111011
Decoded Text: This is a sample text for Huffman encoding,decoding. It has mnultiple line of texts and is used for huffman encoding. This is one part of 

In [None]:
from collections import Counter

def fixed_length_encode(text, bits):
    encoding = {}
    for char in set(text):
        binary_representation = format(ord(char), f'0{bits}b')
        encoding[char] = binary_representation
    return encoding

def fixed_length_size(text, bits):
    return len(text) * bits

def display_comparison_table(characters, frequencies, var_encoding, fixed_5bit, fixed_8bit):
    print(f"{'Character':<10}{'Frequency':<10}{'Var-Encoding':<20}{'Fixed-5bit':<20}{'Fixed-8bit':<20}")
    print('-' * 80)
    for char in characters:
        print(f"{char:<10}{frequencies[char]:<10}{var_encoding[char]:<20}{fixed_5bit[char]:<20}{fixed_8bit[char]:<20}")

def calculate_compression_ratio(original_size, compressed_size):
    return compressed_size / original_size

def main():
    # Input texts
    text1 = "This is a sample text for Huffman encoding and decoding."
    text2 = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
    text3 = "The quick brown fox jumps over the lazy dog."

    texts = [text1, text2, text3]

    for i, input_text in enumerate(texts, start=1):
        print(f"\n--- Text {i} ---")
        print("Input Text:", input_text)

        # Variable-length Huffman coding
        huffman_tree = build_huffman_tree(input_text)
        huffman_mapping = build_huffman_codes(huffman_tree)
        var_encoding_size = len(''.join(huffman_mapping[char] for char in input_text))

        # Fixed-length coding (5-bit)
        fixed_5bit_encoding = fixed_length_encode(input_text, 5)
        fixed_5bit_size = fixed_length_size(input_text, 5)

        # Fixed-length coding (8-bit)
        fixed_8bit_encoding = fixed_length_encode(input_text, 8)
        fixed_8bit_size = fixed_length_size(input_text, 8)

        # Display comparison table
        display_comparison_table(
            characters=huffman_mapping.keys(),
            frequencies=Counter(input_text),
            var_encoding=huffman_mapping,
            fixed_5bit=fixed_5bit_encoding,
            fixed_8bit=fixed_8bit_encoding
        )

        # Calculate compression ratios
        huffman_compression_ratio = calculate_compression_ratio(len(input_text) * 8, var_encoding_size)
        fixed_5bit_compression_ratio = calculate_compression_ratio(len(input_text) * 8, fixed_5bit_size)
        fixed_8bit_compression_ratio = calculate_compression_ratio(len(input_text) * 8, fixed_8bit_size)

        # Display compression ratios
        print("\nCompression Ratios:")
        print(f"Huffman vs 5-bit Fixed: {huffman_compression_ratio:.2%}")
        print(f"Huffman vs 8-bit Fixed: {huffman_compression_ratio:.2%}")
        print(f"5-bit Fixed vs 8-bit Fixed: {fixed_5bit_compression_ratio / fixed_8bit_compression_ratio:.2%}")

if __name__ == "__main__":
    main()


--- Text 1 ---
Input Text: This is a sample text for Huffman encoding and decoding.
Character Frequency Var-Encoding        Fixed-5bit          Fixed-8bit          
--------------------------------------------------------------------------------
n         5         000                 1101110             01101110            
T         1         00100               1010100             01010100            
.         1         00101               101110              00101110            
o         3         0011                1101111             01101111            
f         3         0100                1100110             01100110            
H         1         01010               1001000             01001000            
m         2         01011               1101101             01101101            
s         3         0110                1110011             01110011            
i         4         0111                1101001             01101001            
a         4         1000