In [1]:
import numpy as np
import heapq
import pandas as pd
import sys
import os
import time
import binascii
import pickle

from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import QApplication, QWidget, QInputDialog, QLineEdit, QFileDialog
from PyQt5.QtGui import QIcon
from PyQt5.QtWidgets import QMessageBox

In [2]:
class fileClass:
    f = None
    array = []
    data_dict = {}
    sorted_dict = {}
    tree = None
    
    def __init__(self, path):
        self.f = open(path, "r")
        
        self.array = []
        self.data_dict = {}
        self.sorted_dict = {}
        self.tree = None
    
    def readFile(self):
        line = " "
        
        while line != '':
            line = self.f.read(1)
            self.array.append(line)
            if line in self.data_dict.keys():
                self.data_dict[line] += 1
                continue
            self.data_dict[line] = 1
        self.sort_dict()
    
    def sort_dict(self):
        self.sorted_dict = dict(sorted(self.data_dict.items(), key = lambda x : x[1]))

In [3]:
class Node:
    left = None
    right = None
    parent = None
    val = 0
    let = None
    isRight = ''
    
    def __init__(self, val, parent = None, isRight = 0, let = None):
        self.parent = parent
        self.val = val
        self.let = let
        self.isRight = isRight
    
    def __lt__(self, other):
        return self.val <= other.val
    
    def __str__(self):
        if self.let != None:
            return "Node with value = " + str(self.val) + " and letter = " + self.let
        else:
            return "Node with value = " + str(self.val)

In [4]:
class Tree:
    root = None
    new_codes = {}
    
    def __init__(self, root):
        self.root=root
        
    def dfs(self,goal):
        frontier = []
        frontier.append(self.root)
        explored = []
        
        while frontier:
            node = frontier.pop()
            explored.append(node)
            if node.let == goal:
                return node
            if node.left != None and node.left not in explored:
                frontier.append(node.left)
            if node.right != None and node.right not in explored:
                frontier.append(node.right)
                
        return None
        
    def get_ancestral_chain(self,node):
        current = node
        chain = [current]
        while current.parent != None:
            current = current.parent
            chain.append(current)
        return chain
    
    def construct_new_codes(self,old_codes):
        for i in list(old_codes.keys()):
            node = self.dfs(goal = i)
            chain = self.get_ancestral_chain(node)
            code = ""
            while(len(chain) > 0):
                next_el = chain.pop()
                if(next_el.isRight != None):
                    code += str(next_el.isRight)        
            self.new_codes[i] = code

In [5]:
class Huffman:
    file = None
    
    def __init__(self, File):
        self.file = File
    
    def construct_file_tree(self):
        min_heap = []
        parent = None
        for i in self.file.sorted_dict.keys():
            node = Node(val = self.file.sorted_dict[i], let = i)
            heapq.heappush(min_heap, node)
        while(len(min_heap) > 1):
            node1 = heapq.heappop(min_heap)
            node2 = heapq.heappop(min_heap)
            parent = Node(val = node1.val + node2.val)
            node1.parent = node2.parent = parent
            if node1 < node2:
                node1.isRight = '0'
                node2.isRight = '1'
                parent.left = node1
                parent.right = node2
            else:
                node1.isRight = '1'
                node2.isRight = '0'
                parent.left = node2
                parent.right = node1
            heapq.heappush(min_heap, parent)
        parent.isRight = None
        self.file.tree = Tree(parent)            

In [6]:
def compress(file, compressed_name):
    file.tree.construct_new_codes(file.data_dict)
    compressed_name += ".bin"
    compressed_file = open(compressed_name, 'wb')
    
    flag_array = []
    
    encoded_bits = ''
    
    character_frame = pd.DataFrame(columns = ["Byte","Code","New Code"])
    
    for character in file.array:
        if(character == ''):
            break
        
        encoded_bits = encoded_bits + str(file.tree.new_codes[character])
        
        if(character not in flag_array):
            character_frame = character_frame.append({"Byte":character, "Code":str(bin(ord(character)))[2:],
                                    "New Code":file.tree.new_codes[character]}, ignore_index = True)
            flag_array.append(character)
    
    print(character_frame)
    
    no_of_pads = 0
    if len(encoded_bits) % 8 != 0:
        for i in range(8 - len(encoded_bits) % 8):
            encoded_bits += '0'
            no_of_pads += 1
    
    encoded_bits = encoded_bits + '{0:08b}'.format( no_of_pads )
    
    zeros_32_bits = "00000000000000000000000000000000"
    index_by_bytes = int(len(encoded_bits)/8)
    encoded_bits = zeros_32_bits[:32 - len(str(bin(index_by_bytes)[2:])) ] + str(bin(index_by_bytes)[2:]) + encoded_bits
    
    byte = ''
    test = ''
    for bit in encoded_bits:    
        byte += bit
        if(len(byte) == 8):
            compressed_file.write( int((byte), 2).to_bytes(1, 'little'))
            byte = ''
            
    compressed_file.write( pickle.dumps( file.tree.new_codes ) )        
    compressed_file.close()

In [7]:
def decompress(compressed_file, decompressed_name):
    decompressed_name += ".txt"
    decompressed_file = open(decompressed_name,'w')
    encoded_bits = ""
    
    dict_offset = ""
    for i in range(4):
        byte = compressed_file.read(1)
        byte = ord(byte)
        bits = bin(byte)[2:].rjust(8,'0')
        dict_offset += bits
    
    offset = int(dict_offset, 2)
    
    i = 0
    
    for i in range(offset):
        byte = compressed_file.read(1)
        byte = ord(byte)
        bits = bin(byte)[2:].rjust(8,'0')
        encoded_bits += bits
        
    padding_info = int(encoded_bits[-8+offset*8:offset*8],2)
    encoded_bits = encoded_bits[:-8+offset*8]
    
    if padding_info > 0:
        encoded_bits = encoded_bits[:-padding_info]
    
    bit_codes_dict = pickle.loads( compressed_file.read() )
    bit_string = ''
    for bit in encoded_bits:
        bit_string += bit
        if bit_string in bit_codes_dict.values():
            decompressed_file.write( list(bit_codes_dict.keys())[list(bit_codes_dict.values()).index(bit_string)] )
            bit_string = ''

    decompressed_file.close()

In [8]:
def compressFolder(folderPath):
    fileNames = []
    
    for filename in os.listdir(folderPath):
        if filename.endswith(".txt"):
            fileNames.append(filename)
        else:
            continue
    
    compressedPath = folderPath + '.txt'
    open(compressedPath, 'w').close()
    
    for filename in fileNames:
        tempFile = folderPath + "\\" + filename
        with open(tempFile,'r') as firstfile, open(compressedPath,'a') as secondfile:
            print(tempFile + " and " + compressedPath)
            secondfile.write("C1000" + filename + "C1000")
            for line in firstfile: 
                secondfile.write(line)
        firstfile.close()
        secondfile.close()
        
    file = fileClass(compressedPath)
    file.readFile()
    file.data_dict.pop('')
    file.sorted_dict.pop('')
        
    huf = Huffman(file)
    huf.construct_file_tree()
    n = file.tree.dfs(goal = 'a')
    chain = file.tree.get_ancestral_chain(n)
    
    print("\nHuffman File Sorted Dic:")
    huf.file.sorted_dict
    
    compress(huf.file, compressedPath[:-4])
    print(compressedPath[:-4])


In [9]:
def decompressFolder(compressed_file, decompressed_name):
    decompressed_name += ".txt"
    decompressed_file = open(decompressed_name,'w')
    encoded_bits = ""
    
    dict_offset = ""
    
    for i in range(4):
        byte = compressed_file.read(1)
        byte = ord(byte)
        bits = bin(byte)[2:].rjust(8,'0')
        dict_offset += bits
    
    offset = int(dict_offset, 2)
    
    i = 0
    
    for i in range(offset):
        byte = compressed_file.read(1)
        byte = ord(byte)
        bits = bin(byte)[2:].rjust(8,'0')
        encoded_bits += bits
        
    padding_info = int(encoded_bits[-8+offset*8:offset*8],2)
    encoded_bits = encoded_bits[:-8+offset*8]
    
    if padding_info > 0:
        encoded_bits = encoded_bits[:-padding_info]
    
    bit_codes_dict = pickle.loads( compressed_file.read() )
    bit_string = ''
    for bit in encoded_bits:
        bit_string += bit
        if bit_string in bit_codes_dict.values():
            decompressed_file.write( list(bit_codes_dict.keys())[list(bit_codes_dict.values()).index(bit_string)] )
            bit_string = ''

    decompressed_file.close()
    
    with open(decompressed_name, "r") as file:
        fp = file.read()
        file.close()
    contents = fp.split("C1000")
    filePath = os.path.join(os.getcwd(), decompressed_name[:-4])
    if not os.path.exists(filePath):
        os.mkdir(filePath)
    
    j = len(contents)
    
    for i in range (1, j, 2):
        path = decompressed_name[:-4] + "\\" + contents[i]
        fp = open(path, "w")
        i += 1
        fp.write(contents[i])
        fp.close
    
    os.remove(decompressed_name)

In [10]:
class Ui_MainWindow(object):
    
    def browseFiles(self): 
        filename = QFileDialog.getOpenFileName()
        path = filename[0]

        self.textField_path.setText(path)
    
    def show_popup(self, flag):
        if flag == 0:
            msg = QMessageBox()
            msg.setWindowTitle("No File Selected")
            msg.setText("Please choose a file to compress!")
            msg.exec_()
        elif flag == 1:
            msg = QMessageBox()
            msg.setWindowTitle("Unavailable File")
            msg.setText("Please choose a (.bin) file to decompress!")
            msg.exec_()
        elif flag == 2:
            msg = QMessageBox()
            msg.setWindowTitle("Unavailable File")
            msg.setText("Please choose a (.txt) file to compress!")
            msg.exec_()
        else:
            msg = QMessageBox()
            msg.setWindowTitle("Unavailable Folder")
            msg.setText("Please choose a folder to compress!")
            msg.exec_()
            
    def showTimeAndRatio(self, flag, time, ratio):
        if(flag):
            msg = QMessageBox()
            msg.setWindowTitle("Compression Successful")
            Text = 'Compression Execution time : ' + str(time) + 'ms\t\n' + 'Compression ratio : ' + str('{:.2f}'.format(ratio)) + '%'
            msg.setText(Text)
            msg.exec_()
        else:
            msg = QMessageBox()
            msg.setWindowTitle("Decompression Successful")
            Text = 'Decompression Execution time : ' + str(time) + 'ms\t\n'
            msg.setText(Text)
            msg.exec_()
        
    def compressFile(self):
        fileName = self.textField_path.toPlainText()
        if fileName == '':
            self.show_popup(0)
            return
        elif fileName[-4:] != ".txt":
            self.show_popup(2)
            return
        
        file = fileClass(fileName)
        file.readFile()
        file.data_dict.pop('')
        file.sorted_dict.pop('')
        
        huf = Huffman(file)
        huf.construct_file_tree()
        n = file.tree.dfs(goal = 'a')
        chain = file.tree.get_ancestral_chain(n)
        
        print(chain[1])
        
        print("\nHuffman File Sorted Dic:")
        huf.file.sorted_dict
        
        start_time = time.time()
        compress(huf.file, fileName[:-4])
        execTime = int((time.time() - start_time) * 1000)
        
        original_file_size = os.path.getsize(fileName) 
        compr_file_size = os.path.getsize(fileName[:-4] + '.bin')
        compRatio = compr_file_size/original_file_size * 100
        
        self.showTimeAndRatio(True, execTime, compRatio)
        
    def decompressFile(self):
        fileName = self.textField_path.toPlainText()
        if fileName == '':
            self.show_popup(0)
            return
        
        elif fileName[-4:] != ".bin":
            self.show_popup(1)
            return
        
        comp_f = open(fileName, 'rb')

        start_time = time.time()
        decompress(comp_f, fileName[:-4] + '_decompressed')
        execTime = int((time.time() - start_time) * 1000)
        comp_f.close()
        
        self.showTimeAndRatio(False, execTime, 0)
        
    def compressFolder(self):
        fileName = self.textField_path.toPlainText()
        if fileName == '':
            self.show_popup(0)
            return
        elif fileName[-4:-3] == ".":
            self.show_popup(3)
            return
        
        start_time = time.time()
        compressFolder(fileName)
        execTime = int((time.time() - start_time) * 1000)
        
        fileName += ".txt"
        
        original_file_size = os.path.getsize(fileName) 
        compr_file_size = os.path.getsize(fileName[:-4] + '.bin')
        compRatio = compr_file_size/original_file_size * 100
        
        self.showTimeAndRatio(True, execTime, compRatio)
        
        os.remove(fileName)
        
    def decompressFolder(self):
        fileName = self.textField_path.toPlainText()
        if fileName == '':
            self.show_popup(0)
            return
        
        elif fileName[-4:] != ".bin":
            self.show_popup(1)
            return
        
        comp_f = open(fileName, 'rb')

        start_time = time.time()
        decompressFolder(comp_f, fileName[:-4] + '_decompressed')
        execTime = int((time.time() - start_time) * 1000)
        comp_f.close()
        
        self.showTimeAndRatio(False, execTime, 0)
    
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(394, 300)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.textField_path = QtWidgets.QTextEdit(self.centralwidget)
        self.textField_path.setGeometry(QtCore.QRect(20, 70, 351, 31))
        font = QtGui.QFont()
        font.setPointSize(8)
        self.textField_path.setFont(font)
        self.textField_path.setObjectName("textField_path")
        self.button_compress = QtWidgets.QPushButton(self.centralwidget)
        self.button_compress.setGeometry(QtCore.QRect(230, 120, 141, 31))
        self.button_compressFolder = QtWidgets.QPushButton(self.centralwidget)
        self.button_compressFolder.setGeometry(QtCore.QRect(230, 200, 141, 31))
        self.button_decompressFolder = QtWidgets.QPushButton(self.centralwidget)
        self.button_decompressFolder.setGeometry(QtCore.QRect(230, 240, 141, 31))
        font = QtGui.QFont()
        font.setPointSize(9)
        self.button_compress.setFont(font)
        self.button_compress.setObjectName("button_compress")
        self.button_compressFolder.setFont(font)
        self.button_compressFolder.setObjectName("button_compressFolder")
        self.button_decompressFolder.setFont(font)
        self.button_decompressFolder.setObjectName("button_decompressFolder")
        self.button_decompress = QtWidgets.QPushButton(self.centralwidget)
        self.button_decompress.setGeometry(QtCore.QRect(230, 160, 141, 31))
        font = QtGui.QFont()
        font.setPointSize(9)
        self.button_decompress.setFont(font)
        self.button_decompress.setObjectName("button_decompress")
        self.button_browse = QtWidgets.QPushButton(self.centralwidget)
        self.button_browse.setGeometry(QtCore.QRect(20, 120, 101, 31))
        self.button_exit = QtWidgets.QPushButton(self.centralwidget)
        self.button_exit.setGeometry(QtCore.QRect(20, 160, 101, 31))
        font = QtGui.QFont()
        font.setPointSize(9)
        self.button_browse.setFont(font)
        self.button_browse.setObjectName("button_browse")
        self.button_exit.setFont(font)
        self.button_exit.setObjectName("button_exit")
        self.label = QtWidgets.QLabel(self.centralwidget)
        self.label.setGeometry(QtCore.QRect(40, 20, 301, 31))
        font = QtGui.QFont()
        font.setPointSize(10)
        self.label.setFont(font)
        self.label.setObjectName("label")
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 394, 26))
        self.menubar.setObjectName("menubar")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)
        
        self.button_browse.clicked.connect(self.browseFiles)
        self.button_compress.clicked.connect(self.compressFile)
        self.button_decompress.clicked.connect(self.decompressFile)
        self.button_compressFolder.clicked.connect(self.compressFolder)
        self.button_decompressFolder.clicked.connect(self.decompressFolder)
        
        self.button_exit.clicked.connect(sys.exit)


    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
        self.textField_path.setHtml(_translate("MainWindow", "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\" \"http://www.w3.org/TR/REC-html40/strict.dtd\">\n"
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head><body style=\" font-family:\'MS Shell Dlg 2\'; font-size:8pt; font-weight:400; font-style:normal;\">\n"
"<p style=\"-qt-paragraph-type:empty; margin-top:0px; margin-bottom:0px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;\"><br /></p></body></html>"))
        self.button_compress.setText(_translate("MainWindow", "Compress"))
        self.button_compressFolder.setText(_translate("MainWindow", "Compress Folder"))
        self.button_decompressFolder.setText(_translate("MainWindow", "Decompress Folder"))
        self.button_decompress.setText(_translate("MainWindow", "Decompress"))
        self.button_browse.setText(_translate("MainWindow", "Browse"))
        self.button_exit.setText(_translate("MainWindow", "Exit"))
        self.label.setText(_translate("MainWindow", "Huffman Compression & Decompression"))


if __name__ == "__main__":
    import sys
    app = QtWidgets.QApplication(sys.argv)
    MainWindow = QtWidgets.QMainWindow()
    ui = Ui_MainWindow()
    ui.setupUi(MainWindow)
    MainWindow.show()
    sys.exit(app.exec_())

C:/Users/Mostafa's PC/Desktop/Algo/Test\test_huffman_one.txt and C:/Users/Mostafa's PC/Desktop/Algo/Test.txt
C:/Users/Mostafa's PC/Desktop/Algo/Test\test_huffman_two.txt and C:/Users/Mostafa's PC/Desktop/Algo/Test.txt

Huffman File Sorted Dic:
   Byte     Code              New Code
0     C  1000011           01100000101
1     1   110001        01100011011110
2     0   110000       111100000001110
3     t  1110100                  1010
4     e  1100101                  1110
..  ...      ...                   ...
79    %   100101  01100011011111001101
80    @  1000000   0110001101111100101
81    [  1011011   0110001101111100111
82    #   100011  01100011011111001100
83    ]  1011101   0110001101111100100

[84 rows x 3 columns]
C:/Users/Mostafa's PC/Desktop/Algo/Test


SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
