In [2]:
import heapq
import os

class BinaryTree:
    def __init__(self, value, frequ):
        self.value = value
        self.frequ = frequ
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.frequ < other.frequ

    def __eq__(self, other):
        return self.frequ == other.frequ

class Huffmancode:
    def __init__(self, path):
        self.path = path
        self.__heap = []
        self.__code = {}
        self.__reverse_code = {}

    def _frequency_from_bytes(self, byte_data):
        frequ_dict = {}
        for byte in byte_data:
            if byte not in frequ_dict:
                frequ_dict[byte] = 0
            frequ_dict[byte] += 1
        return frequ_dict

    def _Build_heap(self, frequency_dict):
        for key in frequency_dict:
            frequency = frequency_dict[key]
            binary_tree_node = BinaryTree(key, frequency)
            heapq.heappush(self.__heap, binary_tree_node)

    def __Build_Binary_tree(self):
        while len(self.__heap) > 1:
            binary_tree_node_1 = heapq.heappop(self.__heap)
            binary_tree_node_2 = heapq.heappop(self.__heap)
            sum_of_freq = binary_tree_node_1.frequ + binary_tree_node_2.frequ
            newnode = BinaryTree(None, sum_of_freq)
            newnode.left = binary_tree_node_1
            newnode.right = binary_tree_node_2
            heapq.heappush(self.__heap, newnode)

    def __Build_Binary_code_Helper(self, root, curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__code[root.value] = curr_bits
            self.__reverse_code[curr_bits] = root.value
            return
        self.__Build_Binary_code_Helper(root.left, curr_bits + '0')
        self.__Build_Binary_code_Helper(root.right, curr_bits + '1')

    def __Build_Binary_code(self):
        root = heapq.heappop(self.__heap)
        self.__Build_Binary_code_Helper(root, '')

    def __Build_Encoded_Text(self, byte_data):
        encoded_text = ''
        for byte in byte_data:
            encoded_text += self.__code[byte]
        return encoded_text

    def __Build_padded_Text(self, encoded_text):
        padding_value = 8 - len(encoded_text) % 8
        for i in range(padding_value):
            encoded_text += '0'
        padded_info = "{0:08b}".format(padding_value)
        padded_text = padded_info + encoded_text
        return padded_text

    def __Build_Byte_Array(self, padded_text):
        array = []
        for i in range(0, len(padded_text), 8):
            byte = padded_text[i:i + 8]
            array.append(int(byte, 2))
        return array

    def compression(self):
        print("Compression for your file has started...")
        filename, file_extension = os.path.splitext(self.path)
        output_path = filename + '.bin'
        with open(self.path, 'rb') as file, open(output_path, 'wb') as output:
            byte_data = file.read()
            frequency_dict = self._frequency_from_bytes(byte_data)
            self._Build_heap(frequency_dict)
            self.__Build_Binary_tree()
            self.__Build_Binary_code()
            encoded_text = self.__Build_Encoded_Text(byte_data)
            padded_text = self.__Build_padded_Text(encoded_text)
            byte_array = self.__Build_Byte_Array(padded_text)
            final_bytes = bytes(byte_array)
            output.write(final_bytes)
        print("Compressed successfully")
        return output_path

    def __Remove_padding(self, padded_text):
        padding_info = padded_text[:8]
        padding_value = int(padding_info, 2)
        padded_text = padded_text[8:]  # Remove padding info
        return padded_text[:-padding_value]  # Remove the padding

    def __Decoded_text(self, text):
        current_bits = ''
        decoded_bytes = bytearray()
        for bit in text:
            current_bits += bit
            if current_bits in self.__reverse_code:
                decoded_bytes.append(self.__reverse_code[current_bits])
                current_bits = ''
        return decoded_bytes

    def decompress(self, input_path):
        filename, file_extension = os.path.splitext(input_path)
        output_path = filename + '_decompressed' + os.path.splitext(self.path)[1]
        with open(input_path, 'rb') as file, open(output_path, 'wb') as output:
            bit_string = ''
            byte = file.read(1)
            while byte:
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8, '0')
                bit_string += bits
                byte = file.read(1)

            text_after_removing_padding = self.__Remove_padding(bit_string)
            actual_data = self.__Decoded_text(text_after_removing_padding)
            output.write(actual_data)
        print("Decompressed successfully")
        return output_path

# Usage
path = input("Enter the path of your file which you need to compress: ")
h = Huffmancode(path)
compressed_file = h.compression()
h.decompress(compressed_file)


Enter the path of your file which you need to compress:  sample3.jpeg


Compression for your file has started...
Compressed successfully
Decompressed successfully


'sample3_decompressed.jpeg'

In [12]:
import heapq
import os
import time
from typing import List, Dict

class BinaryTree:
    def __init__(self, value, frequ):
        self.value = value
        self.frequ = frequ
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.frequ < other.frequ

    def __eq__(self, other):
        return self.frequ == other.frequ

class Huffmancode:
    def __init__(self, files: List[str], compression_level='Balanced'):
        self.files = files
        self.__heap = []
        self.__code = {}
        self.__reverse_code = {}
        self.compression_level = compression_level
        self.algorithm = 'huffman'

    def _frequency_from_bytes(self, byte_data):
        frequ_dict = {}
        for byte in byte_data:
            if byte not in frequ_dict:
                frequ_dict[byte] = 0
            frequ_dict[byte] += 1
        return frequ_dict

    def _build_heap(self, frequency_dict):
        for key in frequency_dict:
            frequency = frequency_dict[key]
            binary_tree_node = BinaryTree(key, frequency)
            heapq.heappush(self.__heap, binary_tree_node)

    def _build_binary_tree(self):
        while len(self.__heap) > 1:
            binary_tree_node_1 = heapq.heappop(self.__heap)
            binary_tree_node_2 = heapq.heappop(self.__heap)
            sum_of_freq = binary_tree_node_1.frequ + binary_tree_node_2.frequ
            newnode = BinaryTree(None, sum_of_freq)
            newnode.left = binary_tree_node_1
            newnode.right = binary_tree_node_2
            heapq.heappush(self.__heap, newnode)

    def _build_binary_code_helper(self, root, curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__code[root.value] = curr_bits
            self.__reverse_code[curr_bits] = root.value
            return
        self._build_binary_code_helper(root.left, curr_bits + '0')
        self._build_binary_code_helper(root.right, curr_bits + '1')

    def _build_binary_code(self):
        root = heapq.heappop(self.__heap)
        self._build_binary_code_helper(root, '')

    def _build_encoded_text(self, byte_data):
        encoded_text = ''
        for byte in byte_data:
            encoded_text += self.__code[byte]
        return encoded_text

    def _build_padded_text(self, encoded_text):
        padding_value = 8 - len(encoded_text) % 8
        for i in range(padding_value):
            encoded_text += '0'
        padded_info = "{0:08b}".format(padding_value)
        padded_text = padded_info + encoded_text
        return padded_text

    def _build_byte_array(self, padded_text):
        array = []
        for i in range(0, len(padded_text), 8):
            byte = padded_text[i:i + 8]
            array.append(int(byte, 2))
        return array

    def compression(self):
        print("Starting compression...")

        total_compressed_size = 0
        total_original_size = 0
        compression_start_time = time.time()
        compressed_file_paths = []  # List to store compressed file paths

        for file_path in self.files:
            filename, file_extension = os.path.splitext(file_path)
            output_path = filename + '_compressed.bin'
            with open(file_path, 'rb') as file, open(output_path, 'wb') as output:
                byte_data = file.read()
                original_size = len(byte_data)
                frequency_dict = self._frequency_from_bytes(byte_data)
                self._build_heap(frequency_dict)
                self._build_binary_tree()
                self._build_binary_code()
                encoded_text = self._build_encoded_text(byte_data)
                padded_text = self._build_padded_text(encoded_text)
                byte_array = self._build_byte_array(padded_text)
                final_bytes = bytes(byte_array)
                output.write(final_bytes)

                compressed_size = len(final_bytes)
                total_original_size += original_size
                total_compressed_size += compressed_size

                compressed_file_paths.append(output_path)  # Add the output path to the list

            print(f"File '{file_path}' compressed to '{output_path}'")
            print(f"Original Size: {original_size} bytes, Compressed Size: {compressed_size} bytes")
            print(f"Compression Ratio: {(original_size - compressed_size) / original_size * 100:.2f}%")

        compression_time = time.time() - compression_start_time
        print(f"\nTotal Compression Time: {compression_time:.2f} seconds")
        print(f"Overall Compression Ratio: {(total_original_size - total_compressed_size) / total_original_size * 100:.2f}%")

        return compressed_file_paths  # Return the list of compressed file paths


    def decompress(self, compressed_file_paths: List[str]):
        print("Starting decompression...")

        for input_path in compressed_file_paths:
            filename, file_extension = os.path.splitext(input_path)
            original_file_extension = os.path.splitext(self.files[compressed_file_paths.index(input_path)])[1]
            output_path = filename + '_decompressed' + original_file_extension
            with open(input_path, 'rb') as file, open(output_path, 'wb') as output:
                bit_string = ''
                byte = file.read(1)
                while byte:
                    byte = ord(byte)
                    bits = bin(byte)[2:].rjust(8, '0')
                    bit_string += bits
                    byte = file.read(1)

                text_after_removing_padding = self._remove_padding(bit_string)
                actual_data = self._decode_text(text_after_removing_padding)
                output.write(actual_data)

            print(f"File '{input_path}' decompressed to '{output_path}'")

    def _remove_padding(self, padded_text):
        padding_info = padded_text[:8]
        padding_value = int(padding_info, 2)
        padded_text = padded_text[8:]  # Remove padding info
        return padded_text[:-padding_value]  # Remove the padding

    def _decode_text(self, text):
        current_bits = ''
        decoded_bytes = bytearray()
        for bit in text:
            current_bits += bit
            if current_bits in self.__reverse_code:
                decoded_bytes.append(self.__reverse_code[current_bits])
                current_bits = ''
        return decoded_bytes

# Usage example
files_to_compress = ['sample2.pdf', 'sample3.jpeg', 'sample1.txt']  # Add your files here
compression_level = 'Balanced'  # Options: 'Fast', 'Balanced', 'Max'
huffman = Huffmancode(files=files_to_compress, compression_level=compression_level)
compressed_file_paths = huffman.compression()

# Decompress the files
huffman.decompress(compressed_file_paths)


Starting compression...
File 'sample2.pdf' compressed to 'sample2_compressed.bin'
Original Size: 2393865 bytes, Compressed Size: 2392305 bytes
Compression Ratio: 0.07%
File 'sample3.jpeg' compressed to 'sample3_compressed.bin'
Original Size: 247696 bytes, Compressed Size: 246979 bytes
Compression Ratio: 0.29%
File 'sample1.txt' compressed to 'sample1_compressed.bin'
Original Size: 5762 bytes, Compressed Size: 3344 bytes
Compression Ratio: 41.96%

Total Compression Time: 0.59 seconds
Overall Compression Ratio: 0.18%
Starting decompression...


KeyboardInterrupt: 