In [None]:
import heapq

import os


class Binary_tree_node:
    def __init__(self, value, freq):
        self.value = value
        self.freq = freq
        self.left = None
        self.right = None

    def __lt__(self, other):
        return self.freq < other.freq

    def __gt__(self, other):
        return self.freq == other.freq


class Haufmann:
    def __init__(self, path):
        self.path = path
        self.__heap = []
        self.__codes = {}
        self.__reverse_codes = {}

    def make_frequency_dict(self, text):
        frequency_dict = {}
        for x in text:
            frequency_dict[x] = frequency_dict.get(x, 0) + 1

        return frequency_dict

    def __buildHeap(self, fre):

        for key in fre:
            frequency = fre[key]

            node = Binary_tree_node(key, frequency)
            heapq.heappush(self.__heap, node)

    def __buildTree(self):

        while len(self.__heap) > 1:
            smallest_1 = heapq.heappop(self.__heap)
            smallest_2 = heapq.heappop(self.__heap)
            sum_fre = smallest_1.freq + smallest_2.freq
            node = Binary_tree_node(None, sum_fre)
            node.left = smallest_1
            node.right = smallest_2
            heapq.heappush(self.__heap, node)

        return

    def __build_code_helper(self, root, curr):

        if root == None:
            return

        if root.value != None:
            self.__codes[root.value] = curr
            self.__reverse_codes[curr] = root.value
            return

        self.__build_code_helper(root.left, curr + '0')
        self.__build_code_helper(root.right, curr + '1')

    def __build_Codes(self):
        root = heapq.heappop(self.__heap)
        self.__build_code_helper(root, '')

    def __build_encoded_text(self, text):

        encoded_text = ''
        for char in text:
            encoded_text += self.__codes[char]

        return encoded_text

    def __build_paded_encoded_text(self, encoded_text):

        padding_amt = 8 - (len(encoded_text) % 8)

        for x in range(padding_amt):
            encoded_text += '0'

        padded_text = "{0:08b}".format(padding_amt)

        encoded_text = padded_text + encoded_text

        return encoded_text

    def __build_encoded_byte(self, paded_text):
        arr = []
        for bits in range(0, len(paded_text), 8):
            byte = paded_text[bits:bits + 8]
            arr.append(int(byte, 2))

        return arr

    def compress(self):
        file_name, file_extension = os.path.splitext(self.path)
        output_path = file_name + '.bin'

        with open(self.path, 'r+') as file, open(output_path, 'wb') as output:
            text = file.read()
            text = text.rstrip()

            frequency_dict = self.make_frequency_dict(text)

            self.__buildHeap(frequency_dict)

            self.__buildTree()

            self.__build_Codes()

            encoded_text = self.__build_encoded_text(text)

            paded_encoded_text = self.__build_paded_encoded_text(encoded_text)

            byte = self.__build_encoded_byte(paded_encoded_text)

            final = bytes(byte)

            output.write(final)

        print('compressed')
        return output_path

    def __removepadding(self, text):

        padded_info = text[:8]
        extra_padding = int(padded_info, 2)
        text = text[8:]
        text_final = text[:-1 * extra_padding]

        return text_final

    def __last_decompression(self, text):
        decoded_text = ''
        current_bits = ''

        for x in text:
            current_bits += x
            if current_bits in self.__reverse_codes:
                decoded_text += self.__reverse_codes[current_bits]
                current_bits = ''

        return decoded_text

    def decompress(self, input_path):
        filename, files_extension = os.path.splitext(self.path)
        output_path = filename + "_decompressed" + ".txt"

        with open(input_path, 'rb') as file, open(output_path, 'w') as output:
            bit_string = ''
            byte = file.read(1)
            while byte:
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8, '0')
                bit_string += bits
                byte = file.read(1)

            actual_text = self.__removepadding(bit_string)
            decompressed_text = self.__last_decompression(actual_text)
            output.write(decompressed_text)



path=  #add your own path
h = Haufmann(path)
output_path = h.compress()
h.decompress(output_path)
