In [10]:
import heapq
import os
class BinaryTree:
    
    def __init__(self,value,freq):
        self.value = value
        self.freq = freq
        self.left = None
        self.right = None
        
    def __lt__(self,other):
        return self.freq < other.freq
    
    def __eq__(self,other):
        return self.freq == other.freq
        
class Huffman:
    
    def __init__(self,path):
        self.path = path
        self.__heap = [] # iterable for min heap
        self.__code = {} # dictionary to store code
    
    def __text_frequency(self,text):
        f_dict = {}
        for ch in text:
            if ch not in f_dict:
                f_dict[ch] = 0
            f_dict[ch] += 1
        return f_dict
    
    def __Build_heap(self,freq_dict):
        for k in freq_dict:
            freq = freq_dict[k]
            binary_tree_node = BinaryTree(k,freq)
            heapq.heappush(self.__heap , binary_tree_node)
            
    def __Build_Binary_Tree(self):
        while len(self.__heap) > 1:
            binary_tree_node_1 = heapq.heappop(self.__heap)
            binary_tree_node_2 = heapq.heappop(self.__heap)
            sum_of_freq = binary_tree_node_1.freq + binary_tree_node_2.freq
            newnode = BinaryTree(None,sum_of_freq)
            newnode.left = binary_tree_node_1
            newnode.right = binary_tree_node_2
            heapq.heappush(self.__heap,newnode)
        return
    
    def __Build_Tree_Code_Helper(self,root,curr_bits):
        if root is None:
            return
        
        if root.value is not None:
            self.__code[root.value] = curr_bits
            return
        
        self.__Build_Tree_Code_Helper(root.left,curr_bits+'0')
        self.__Build_Tree_Code_Helper(root.right,curr_bits+'1')
        
    def __Build_Tree_Code(self):
        root = heapq.heappop(self.__heap)
        self.__Build_Tree_Code_Helper(root,'')
    
    def __Build_Encoded_Text(self,text):
        encoded_text = ''
        for ch in text:
            encoded_text += self.__code[ch]
            
        return encoded_text
    
    def __Build_Padded_Text(self,encoded_text):
        padding_value = 8 - (len(encoded_text) % 8)
        for i in range(padding_value):
            encoded_text += '0'
            
        padded_info = "{0:08b}".format(padding_value)
        padded_text = padded_info + encoded_text
        return padded_text
    
    def __Build_Byte_Array(self,padded_text):
        array = []
        for i in range(0,len(padded_text),8):
            byte = padded_text[i:i+8]
            array.append(int(byte,2))
        return array 
    
    def Compress(self):
        print("Compression for your file starts....")
        # To access file and extract text from that file
        # Calculate frequency of each text and store it in freq dictionary
        # Min heap for two minimum frequency 
        # Construct binary tree from heap
        # construct code from binary tree and store it in dictionary
        # construct encoded text
        # we have to return that binary file as output
        
        # txt = 'bdadhajkdasdjashkdhasfh'
        filename,file_extension = os.path.splitext(self.path)
        output_path = filename + '.bin'
        with open(self.path, 'r+') as file , open(output_path,'wb') as output:
            txt = file.read()
            txt = txt.rstrip()
            freq_dict = self.__text_frequency(txt)
            build_heap = self.__Build_heap(freq_dict)
            self.__Build_Binary_Tree()
            self.__Build_Tree_Code()
            encoded_text = self.__Build_Encoded_Text(txt)
            padded_text = self.__Build_Padded_Text(encoded_text) 
            bytes_array = self.__Build_Byte_Array(padded_text)
            final_bytes = bytes(bytes_array)
            output.write(final_bytes)
        print('successful compression')
        return output_path
    
path = input("Enter path of your file which you need to compress....")
h = Huffman(path)
h.Compress()

Enter path of your file which you need to compress....paragraph.txt
Compression for your file starts....
successful compression


'paragraph.bin'