In [13]:
import heapq
import os

class BinaryTree:
    def __init__(self, value, frequency) :
        self.value = value
        self.frequency = frequency
        self.left = None
        self.right = None
        
    def __lt__(self, other) :
        return self.frequency < other.frequency
        
    def __eq__(self, other) :
        return self.frequency == other.frequency
        
# Huffman encoding gives us prefix free code

class HuffmanCode :

    def __init__(self, path) :
        self.path = path
        self.__heap = []
        self.__code = {}
        self.__reverseCode = {}

        
    def __frequencyFromText__(self, text) :
        frequencyDictionary = {}
        for char in text :
            if char not in frequencyDictionary :
                frequencyDictionary[char] = 0
            frequencyDictionary[char] += 1
        return frequencyDictionary
        
    def __buildHeap(self, frequencyDictionary) :
        for key in frequencyDictionary :
            frequency = frequencyDictionary[key]
            binaryTreeNode = BinaryTree(key,frequency)
            heapq.heappush(self.__heap, binaryTreeNode)

    def __buildBinaryTree(self) :
        while len(self.__heap) > 1:
            binaryTreeNode1 = heapq.heappop(self.__heap)
            binaryTreeNode2 = heapq.heappop(self.__heap)
            sumOfFrequency = binaryTreeNode1.frequency + binaryTreeNode2.frequency
            newNode = BinaryTree(None, sumOfFrequency)
            newNode.left = binaryTreeNode1
            newNode.right = binaryTreeNode2
            heapq.heappush(self.__heap, newNode)
        return 

    def __buildTreeCodeHelper(self, root, currBits) :
        if root is None :
            return 
        if root.value is not None :
            self.__code[root.value] = currBits
            self .__reverseCode[currBits] = root.value
            return
        self.__buildTreeCodeHelper(root.left, currBits + '0')
        self.__buildTreeCodeHelper(root.right, currBits + '1')
    
    def __buildTreeCode(self) :
        root = heapq.heappop(self.__heap)
        self.__buildTreeCodeHelper(root, '')

    def __buildEncodedText(self, text) :
        encodedText = ''
        for char in text :
            encodedText += self.__code[char]
        return encodedText

    def __buildPaddedText(self, encodedText) :
        paddingValue = 8 - len(encodedText) % 8
        for i in range (paddingValue) :
            encodedText += '0'
            
        # String formatting
        
        paddedInfo = "{0:08b}".format(paddingValue) 
        paddedText = paddedInfo + encodedText
        return paddedText

    def __buildByteArray(self, paddedText) :
        array = []
        for i in range (0, len(paddedText), 8) :
            byte = paddedText[i : i+8]
            array.append(int(byte, 2))
        return array

    
    def compression(self) :
        
        print('COMPRESSING.....')
        
        # Steps
        
        # 1 - To access file and extract text from file

        fileName,fileExtension = os.path.splitext(self.path)
        outputPath = fileName + '.bin'
        with open(self.path, 'rb') as file, open(outputPath, 'wb') as output :
            text = file.read()
            text = text.rstrip() # to remove all the extra spaces
        
        # 2 - Calculate frequency of each text and store it into frequency dictionary
            
            frequency_dictionary = self.__frequencyFromText__(text)
    
        # 3 - Create min heap to extract two minimun element from it
    
            build_heap = self.__buildHeap(frequency_dictionary)
            
        # 4 - Construct binary tree from heap
    
            self.__buildBinaryTree()
    
        # 5 - Construct code form binary tree and store it into dictionary
            
            self.__buildTreeCode()
            
        # 6 - Calculate encoded text
            
            encodedText = self.__buildEncodedText(text)
        # 7 - Padding of encoded text
            
            paddedText = self.__buildPaddedText(encodedText)
           
        # 8 - Return binary file as output
            
            bytesArray = self.__buildByteArray(paddedText)
            finalBytes = bytes(bytesArray)
            output.write(finalBytes)
        return outputPath

    def __removePadding(self, text) :
        paddedInfo = text[:8] # slicing
        paddingValue = int(paddedInfo, 2) # base 2
        text = text[8:]
        # text = text[-1 * paddingValue]
        # return text
        return text[:-paddingValue]
    
    def __decodedText(self, text):
        currentBits = ''
        decodedText = bytearray()
        for char in text:
            currentBits += char
            if currentBits in self.__reverseCode:
                decodedText.append(self.__reverseCode[currentBits])
                currentBits = ''
        return bytes(decodedText)
                
    def decompress(self, inputPath) :
        filename, fileExtension = os.path.splitext(inputPath)
        outputPath = filename + '_decompressed' + fileExtension
        with open (inputPath, 'rb') as file, open(outputPath, 'wb') as output :
            bitString = ''
            byte = file.read(1) # reading one by one 
            while byte :
                byte = ord(byte) # ord is converting into integer
                bits = bin(byte)[2:].rjust(8, '0') # bin is converting into binary and rjust to convet into 8 bit format 
                bitString += bits
                byte = file.read(1)
            textAfterRemovingPadding = self.__removePadding(bitString)
            actualText = self.__decodedText(textAfterRemovingPadding)
            output.write(actualText.encode())
        return outputPath

path = input("ENTER PATH : -> ")
h = HuffmanCode(path)
compressedFile = h.compression()
h.decompress(compressedFile)
print('Compression Successfull')

COMPRESSING.....


TypeError: ord() expected string of length 1, but int found