In [21]:
import heapq, os
class BTNode:
    def __init__(self,value,freq):
        self.value = value
        self.freq = freq
        self.left = None
        self.right = None
        
    def __lt__(self,other):
        return self.freq < other.freq
    
    def __eq__(self,other):
        return self.freq == other.freq

In [22]:
class HuffmanCoding:
    def __init__(self,path):
        self.path = path
        self.__heap = []
        self.__codes = {}
        self.__revCodes = {}

    ## Construct Frequency Dictionary
    def __make_fdict(self,text):
        freqD = {}
        for char in text:
            freqD[char] = freqD.get(char,0) + 1
        return freqD
    
    ## Construct Heap
    def __buildHeap(self, freq_dict):
        for key in freq_dict:
            frequency = freq_dict[key]
            curr_node = BTNode(key,frequency)
            heapq.heappush(self.__heap,curr_node)
        return
            
    ## Construct Binary Tree
    def __buildTree(self):
        while (len(self.__heap) > 1):
            BTN1 = heapq.heappop(self.__heap)
            BTN2 = heapq.heappop(self.__heap)
            
            freq_sum = BTN1.freq + BTN2.freq
            new_node = BTNode(None,freq_sum)
            new_node.left = BTN1
            new_node.right = BTN2
            heapq.heappush(self.__heap,new_node)
        return
    
    ## Build Codes
    def __buildCodesHelper(self,root,curr_bits):
        if root is None:
            return
        if root.value is not None:
            self.__codes[root.value] = curr_bits
            self.__revCodes[curr_bits] = root.value
            return
        self.__buildCodesHelper(root.left,curr_bits+'0')
        self.__buildCodesHelper(root.right,curr_bits+'1')
        return
    
    def __buildCodes(self):
        root = heapq.heappop(self.__heap)
        self.__buildCodesHelper(root,'')
        return
    
    ## Encoding Text
    def __getEncodedText(self,text):
        encoded_text = ''
        for char in text:
            encoded_text += self.__codes[char]
        return encoded_text
    
    ## Padding Encoded Text
    def __getPadded(self,encoded_text):
        padded_amt = 8 - ( len(encoded_text) % 8)
        for i in range(padded_amt):
            encoded_text += '0'
        
        padded_info = '{0:08b}'.format(padded_amt)
        padded_text = padded_info + encoded_text
        return padded_text
    
    ## Convert Into Bytes
    def __getBytesArr(self,padded_text):
        arr = []
        for i in range(0,len(padded_text),8):
            byte = padded_text[i:i+8]
            arr.append(int(byte,2))
        return arr
    
    ## Compressing File
    def compress(self):
        file_name, file_extension = os.path.splitext(self.path)
        output_path = file_name + '.bin'
        
        with open(self.path) as file, open(output_path,'wb') as output:
            text = file.read().rstrip()
            freq_dict = self.__make_fdict(text)
            self.__buildHeap(freq_dict)
            self.__buildTree()
            self.__buildCodes()
            
            encoded_text = self.__getEncodedText(text)
            padded_encoded_text = self.__getPadded(encoded_text)
            bytes_arr = self.__getBytesArr(padded_encoded_text)
            final_bytes = bytes(bytes_arr)
            output.write(final_bytes)
            
        print('Compressed')
        return output_path
    
    ## Remove Padding
    def __remPadding(self,text):
        padded_info = text[:8]
        extra_padding = int(padded_info,2)
        text = text[8:]
        act_text = text[:-extra_padding]
        return act_text
    
    ## Decoding Text
    def __decodeText(self,text):
        curr_bits = ''
        decoded_text = ''
        for bit in text:
            curr_bits += bit
            if curr_bits in self.__revCodes:
                character = self.__revCodes[curr_bits]
                decoded_text += character
                curr_bits = ''
        return decoded_text
    
    ## Decompressing File
    def decompress(self,input_path):
        file_name, file_extension = os.path.splitext(input_path)
        output_path = file_name + '_decompressed.txt'
        
        with open(input_path,'rb') as file, open(output_path,'w') as output:
            bit_string = ''
            byte = file.read(1)
            while byte:
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8,'0')
                bit_string += bits
                byte = file.read(1)
            
            actual_text = self.__remPadding(bit_string)
            decomp_text = self.__decodeText(actual_text)
            output.write(decomp_text)
        return

In [24]:
path = 'New Text Document.txt'
hc = HuffmanCoding(path)
output_path = hc.compress()
hc.decompress(output_path)

Compressed
