In [None]:
import heapq
import os


class BinaryTree:
  def __init__(self,value,frequ):
    self.value=value
    self.frequ=frequ
    self.left=None
    self.right=None
  def __lt__(self,other):
        return self.frequ < other.frequ

  def __eq__(self,other):
      return self.frequ == other.frequ


class HuffamanCode:
  def __init__(self,path):
    self.path=path
    self.__heap=[]
    self.__code={}
    self.__reversecode = {}



  def __frequency_from_text(self,text):
    freq_dict={}
    for char in text:
      if char not in freq_dict:
        freq_dict[char]=0
      freq_dict[char]+=1;
    return freq_dict

  def __Build_heap(self,frequency_dict):
    for key in frequency_dict:
      frequency=frequency_dict[key]
      binary_tree_node=BinaryTree(key,frequency)
      heapq.heappush(self.__heap,binary_tree_node)


  def __Build_binary_tree(self):
    while len(self.__heap)>1:
      binary_tree_node1=heapq.heappop(self.__heap)
      binary_tree_node2=heapq.heappop(self.__heap)
      sum_of_freq=binary_tree_node1.frequ+binary_tree_node2.frequ
      newNode=BinaryTree(None,sum_of_freq)
      newNode.left=binary_tree_node1
      newNode.right=binary_tree_node2
      heapq.heappush(self.__heap,newNode)
    return

  def __Build_Tree_Code_Helper(self,root,curbits):
    if root is None:
      return
    if root.value is not None:
      self.__code[root.value]=curbits
      self.__reversecode[curbits] = root.value
      return

    self.__Build_Tree_Code_Helper(root.left,curbits+'0')
    self.__Build_Tree_Code_Helper(root.right,curbits+'1')

  def __Build_Tree_Code(self):
    root=heapq.heappop(self.__heap)
    self.__Build_Tree_Code_Helper(root,'')

  def __Build_Encoded_Text(self,text):
    encoded_text=''
    for char in text:
      encoded_text+=self.__code[char]
    return encoded_text
  def __Build_padded_text(self,encoded_text):
    padding_value=8-len(encoded_text)%8
    for i in range(padding_value):
      encoded_text+='0'
    padded_info="{0:08b}".format(padding_value)
    padded_text=padded_info+encoded_text
    return padded_text

  def __Build_bytes_array(self,padded_text):
    array=[]
    for i in range(0,len(padded_text),8):
      byte=padded_text[i:i+8]
      array.append(int(byte,2))
    return array

  def Compression(self):
    print("starting compression")
    #to access the file and extract text from the file
    filename,file_extensiom=os.path.splitext(self.path)
    output_path=filename+'.bin'

    with open(self.path,'r+') as file,open(output_path,'wb') as output:
      text=file.read()
      text=text.rstrip()

      frequency_dict=self.__frequency_from_text(text)
      ## create min Heap two minimum freq from the frequency
      build_heap=self.__Build_heap(frequency_dict)
      ## creating binary tree
      self.__Build_binary_tree()
      ## contruct the codes from the binary tree and stroing it in dictionary
      self.__Build_Tree_Code()
      ##contruct the encoded text;
      encoded_text=self.__Build_Encoded_Text(text)
      ##padding the text
      padded_text=self.__Build_padded_text(encoded_text)
      #return thr binary encoded values file as output
      bytes_array=self.__Build_bytes_array(padded_text)

      final_bytes=bytes(bytes_array)
      output.write(final_bytes)
    print("compressed_success")
    return output_path
  def __Remove_Padding(self,text):
        padded_info = text[:8]
        extra_padding = int(padded_info,2)
        text = text[8:]
        padding_removed_text = text[:-1*extra_padding]
        return padding_removed_text

  def __Decompress_Text(self,text):
        decoded_text = ''
        current_bits = ''
        for bit in text:
            current_bits += bit
            if current_bits in self.__reversecode:
                character = self.__reversecode[current_bits]
                decoded_text += character
                current_bits = ""
        return decoded_text

  def decompression(self,input_path):
    filename,file_extension=os.path.splitext(input_path)
    output_path=filename+'_decompressed'+'.txt'
    with open(input_path,'rb') as file , open(output_path,'w') as output:
            bit_string = ''
            byte = file.read(1)
            while byte:
                byte = ord(byte)
                bits = bin(byte)[2:].rjust(8,'0')
                bit_string += bits
                byte = file.read(1)

            actual_text = self.__Remove_Padding(bit_string)
            decompressed_text = self.__Decompress_Text(actual_text)
            output.write(decompressed_text)
    return



path=input("enter the path of file to compress")
h=HuffamanCode(path)
compressed_file=h.Compression()
h.decompression(compressed_file)




enter the path of file to compressritvik.txt
starting compression
compressed_success
