In [1]:
import os
import heapq

class BinaryTreeNode:
    def __init__(self, value, freq):
        self.value = value
        self.freq = freq
        self.left = None
        self.right = None
    
    def __lt__(self, other):
        return self.freq < other.freq
    
    def __eq__(self, other):
        return self.freq == other.freq

class HuffmanCoding:
    def __init__(self, path):
        self.path = path
        self.__heap = []
        self.__codes = {}
        self.__reverseCodes = {}
    
    def __make_frequency_dictionary(self, text):
        frequency_dict = {}
        for char in text:
            if char not in frequency_dict:
                frequency_dict[char] = 0
            frequency_dict[char] += 1
        return frequency_dict
    
    def __build_heap(self, freq_dict):
        for key in freq_dict:
            frequency = freq_dict[key]
            binary_tree_node = BinaryTreeNode(key, frequency)
            heapq.heappush(self.__heap, binary_tree_node)
    
    def __build_tree(self):
        while len(self.__heap) > 1:
            binary_tree_node_1 = heapq.heappop(self.__heap)
            binary_tree_node_2 = heapq.heappop(self.__heap)
            freq_sum = binary_tree_node_1.freq + binary_tree_node_2.freq
            new_node = BinaryTreeNode(None, freq_sum)
            new_node.left = binary_tree_node_1
            new_node.right = binary_tree_node_2
            heapq.heappush(self.__heap, new_node)
    
    def __build_code_helper(self, root, curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__codes[root.value] = curr_bits
            self.__reverseCodes[curr_bits] = root.value
            return
        self.__build_code_helper(root.left, curr_bits + "0")
        self.__build_code_helper(root.right, curr_bits + "1")
    
    def __build_code(self):
        root = heapq.heappop(self.__heap)
        self.__build_code_helper(root, "")
    
    def __get_encoded_text(self, text):
        encoded_text = ""
        for char in text:
            encoded_text += self.__codes[char]
        return encoded_text
    
    def __get_padded_encoded_text(self, encoded_text):
        padded_amount = 8 - (len(encoded_text) % 8)
        for i in range(padded_amount):
            encoded_text += "0"
        padded_info = "{0:08b}".format(padded_amount)
        encoded_text = padded_info + encoded_text
        return encoded_text
    
    def __get_bytes_array(self, padded_encoded_text):
        array = []
        for i in range(0, len(padded_encoded_text), 8):
            byte = padded_encoded_text[i:i+8]
            array.append(int(byte, 2))
        return array
    
    def compress(self):
        # Get file from path
        file_name, file_extension = os.path.splitext(self.path)
        output_path = file_name + ".bin"
        # Read text from file
        with open(self.path, "r") as file, open(output_path, 'wb') as output:
            text = file.read()
            text = text.rstrip()
        # Make frequency dictionary using the text
        freq_dict = self.__make_frequency_dictionary(text)
        # Construct the heap from freq dict
        self.__build_heap(freq_dict)
        # Construct the binary tree from heap
        self.__build_tree()
        # Construct the codes from binary tree
        self.__build_code()
        # Creating the encoded text using the codes
        encoded_text = self.__get_encoded_text(text)
        # Padding the encoded Text
        padded_encoded_text = self.__get_padded_encoded_text(encoded_text)
        # Put the encoded text into the binary file
        bytes_array = self.__get_bytes_array(padded_encoded_text)
        # Return this binary file as output
        final_bytes = bytes(bytes_array)
        output.write(final_bytes)
        print("Compressed!")
    
    def __remove_padding(self, text):
        padded_info = text[:8]
        extra_padding = int(padded_info, 2)
        text = text[8:]
        text_after_padding_removed = text[:-extra_padding]
        return text_after_padding_removed
    
    def __decode_text(self, text):
        decoded_text = ""
        curr_bits = ""
        for bit in text:
            curr_bits += bit
            if curr_bits in self.__reverseCodes:
                character = self.__reverseCodes[curr_bits]
                decoded_text += character
                curr_bits = ""
        return decoded_text
    
    def decompress(self, input_path):
        file_name, file_extension = os.path.splitext(self.path)
        output_path = file_name + "_decompressed" + ".txt"
        with open(input_path, 'rb') as file, open(output_path, 'w') as output:
            bit_string = ""
            byte = file.read(1)
            while byte:
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8, '0')
                bit_string += bits
                byte = file.read(1)
            actual_text = self.__remove_padding(bit_string)
            decompressed_text = self.__decode_text(actual_text)
            output.write(decompressed_text)
        print("Decompressed!")

# Example usage:
# huffman = HuffmanCoding("input.txt")
# huffman.compress()
# huffman.decompress("input.bin")

In [2]:
path = "C:/Users/Nishant/Desktop/sample/sample1.txt"
h = HuffmanCoding(path)
output_path = h.compress()
h.decompress(output_path)

ValueError: write to closed file