In [16]:
import heapq
import os

class Binarytree:
    def __init__(self,value,frequ):
        self.value=value
        self.frequ= frequ
        self.left=None
        self.right=None

    def __lt__(self,other):
        return self.frequ < other.frequ

    def __eq__(self, other):
        return self.frequ == other.frequ


class Huffmancode:
    def __init__(self,path):
        self.path=path
        self.__heap=[]
        self.__code={}
        self.__reversecode = {}

    def __frequency_from_text(self,text):
        freq_dict={}
        for char in text:
            if char not in freq_dict:
                freq_dict[char]=0
            freq_dict[char] += 1
        return freq_dict
    
    def __Bulid_heap(self,frequency_dict):
        for key in frequency_dict:
            frequency = frequency_dict[key]
            binary_tree_node = Binarytree(key,frequency)
            heapq.heappush(self.__heap,binary_tree_node)

    def __build_binary_tree(self):
        while len(self.__heap) > 1 :
            binary_tree_node_1 = heapq.heappop(self.__heap)
            binary_tree_node_2 = heapq.heappop(self.__heap)
            sumoffreq = binary_tree_node_1.frequ + binary_tree_node_2.frequ
            newnode = Binarytree(None,sumoffreq)
            newnode.left = binary_tree_node_1
            newnode.right = binary_tree_node_2
            heapq.heappush(self.__heap,newnode)
        return
    
    def __build_tree_code_helper(self,root,curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__code[root.value] = curr_bits
            self.__reversecode[curr_bits] = root.value
            return
        self.__build_tree_code_helper(root.left,curr_bits + '0')
        self.__build_tree_code_helper(root.right,curr_bits + '1')
    
    def __build_tree_code(self):
        root=heapq.heappop(self.__heap)
        self.__build_tree_code_helper(root,'')

    def __build_encoded_text(self,text):
        encoded_text=''
        for char in text:
            encoded_text += self.__code[char]
        return encoded_text
    
    def __build_padded_text(self,encoded_text):
        padding_value = 8 - len(encoded_text) % 8
        for i in range(padding_value):
            encoded_text += '0'
        padded_info = "{:08b}".format(padding_value)
        padded_text = padded_info + encoded_text
        return padded_text
    
    def __build_byte_array(self, padded_text):
        array = []
        for i in range(0, len(padded_text), 8):
            byte = padded_text[i:i+8]  # Fix: Change from [i:i-8] to [i:i+8]
            array.append(int(byte, 2))
        return array


    def compression(self):
        print("**************************COMPRESSION STARTED*********************************")
        #to acces the file and exth0ract text form that file
        #calculate freq of ecah text and store it in dictionary
        filename,file_extension= os.path.splitext(self.path)
        output_path=filename + '.bin'
        with open(self.path,'r+') as file , open(output_path,'wb') as output :
            text = file.read()
            text = text.rstrip()
            frequency_dict=self.__frequency_from_text(text)

            #min heap for two minimum freq
            build_heap=self.__Bulid_heap(frequency_dict)

            #constuct binary tree from heap
            self.__build_binary_tree()

            #construct the code from binarytree and store it in dict
            self.__build_tree_code()

            #construct encoded text
            encoded_text=self.__build_encoded_text(text)

            #padding of encoded text
            padded_text = self.__build_padded_text(encoded_text)
            
            #return that binary file as an output
            bytes_array = self.__build_byte_array(padded_text)
            final_bytes = bytes(bytes_array)
            output.write(final_bytes)
        print('***************************COMPRESSED SUCCESFULLY****************************')
        return output_path
    
    def __remove_padding(self,text):
        padded_info = text[:8]
        padding_value = int(padded_info,2)
        text = text[8:]
        text = text[:-1*padding_value]
        return text
    
    def __decoded_text(self,text):
        current_bits = ''
        decodded_text = ''
        for char in text:
            current_bits += char
            if current_bits in self.__reversecode:
                decodded_text += self.__reversecode[current_bits]
                current_bits = ''
        return decodded_text

    def decompress(self,input_path):
        filename,file_extension = os.path.splitext(input_path)
        output_path = filename + '_Decompressed' + '.txt'
        with open(input_path,'rb') as file , open(output_path,'w') as  output:
            bit_string = ''
            byte = file.read(1)
            while byte:
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8,'0')
                bit_string += bits
                byte = file.read(1)

            text_after_removing_padding = self.__remove_padding(bit_string)
            actual_text = self.__decoded_text(text_after_removing_padding)
            output.write(actual_text)
        return output_path

path =input("Enter the text file name : ")
h = Huffmancode(path)
compressed_file = h.compression()
h.decompress(compressed_file)

**************************COMPRESSION STARTED*********************************
***************************COMPRESSED SUCCESFULLY****************************


'text_file_decompressed.txt'