In [3]:
import os
import heapq

class BinaryTreeNode:
    def __init__(self, value, freq):
        self.value = value
        self.freq = freq
        self.left = None
        self.right = None
    def __lt__(self, other):
        return self.freq < other.freq
    def __eq__(self, other):
        return self.freq == other.freq

class HuffmanCoding:
    
    def __init__(self, path):
        self.path = path
        self.__heap = []
        self.codes = {}
        
    
    def __make_ferquency_dictionary(self, text):
        
        frequency_dict = {}
        
        for char in text:
            if char not in frequency_dict:
                frequency_dict[char] = 0
            frequency_dict[char] += 1
            
    def __build_heap(self,freq_dict):
        
        for key in freq_dict:
            frequency = freq_dict[key]
            binary_tree_node = BinaryTreeNode(key, frequency)
            heapq.heappush(self.__heap,binary_tree_node)
        
    def __buildTree(self):
        while len(self.__heap) > 1:
            binary_tree_node_1 = heapq.heappop(self.__heap)
            binary_tree_node_2 = heapq.heappop(self.__heap)
            freq_sum = binary_tree_node_1.freq + binary_tree_node_2.freq
            newNode = BinaryTreeNode(None,freq_sum)
            newNode.left = binary_tree_node_1
            newNode.right = binary_tree_node_2
            heapq.heappush(self.__heap, newNode)
        return
    
    def __build_code_helper(self,root,curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__codes[root.value] = curr_bits
            return
                
        self.__build_code_helper(root.left, curr_bits + "0")
        self.__build_code_helper(root.right, curr_bits + "1")
        
           
    def __build_code(self):
        root = heapq.heappop(self.__heap)
        self.__build_code_helper(root,"")
    
    def __getEncodedText(self,text):
        encoded_text = ""
        for char in self.__codes:
            encoded_text += self.__codes[char]
        return encoded_text
        
    def __getPaddedEncodedText(self, text):
        
        padded_amount = 8 - (len(encoded_text)%8)
        
        for i in range(padded_amount):
            encoded_text += "0"
        padded_info = "{0:08b}".format(padded_amount)
        padded_encoded_text = padded_info + encoded_text
        return padded_encoded_text
    
    
    
    def __getBytesArray(self, padded_encoded_text):
        
        array = []
        for i in range(0,len(padded_encoded_text),8):
            byte = padded_encoded_text[i:i+8]
            array.append(int(byte,2))
        
        return array
    
        
    
    def compress(self):
        #get file from path
        file_name, file_extension = os.path.splitext(self.path)
        output_path = file_name + ".bin"
        
        
        #read text from file
        with open(self.path,"r+") as file, open(output_path, 'wb') as output:
            text = file.read()
            text = text.rstrip()
        
        
            #make frequency dictionary using the text
            freq_dict = self.__make_ferquency_dictionary(text)

            #Construct the heap from freq dict

            self.__build_heap(freq_dict)

            #Construct the binary tree from heap

            self.__buildTree()

            #Construct the codes from binary tree

            self.__buildCode()

            #Creating the encoded text using the codes

            encodedText = self.__getPaddedEncodedText(text)

            #Padding the encoded Text

            padded_encoded_text = self.__getEncodedText(text)




            #Put the encoded text into the binary file
            bytes_array = self.__getBytesArray(padded_encoded_text)

            #Return this binary file as output

            final_bytes = bytes(bytes_array)
            output.write(final_bytes)
        print("Compressed!")
        return output_path
        
        
        
        

In [4]:
path = "C:\Users\Nishant\Desktop\Nixxant_python\03_Data_Structures_and_Algorithm\24_Huffman_Coding\sample1.txt"
h = HuffmanCoding(path)
output_path = h.compress()

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (Temp/ipykernel_13700/393277441.py, line 1)