## Laboratorium 2


### Huffman's static algorithm

In [318]:
import time
import heapq
from bitarray import bitarray
from bitarray.util import ba2int, int2ba

Algorytm do utworzenia drzewa dla skompresowania danych - Huffman statyczny 

In [None]:
BITS_IN_BYTE = 8
BIT = 1
ONE = bitarray("00")
TWO = bitarray("01")
THREE = bitarray("10")
FOUR = bitarray("11")

class Node():
    def __init__(self, value, char = None, left = None, right = None):
        self.left = left
        self.right = right
        self.value = value
        self.char = char
        self.huff = ''
        self.parent = None

    def  __lt__(self, nxt):
        return self.value < nxt.value

    def __str__(self) -> str:
        return "{} {}".format(str(self.value),str(self.huff))


class StaticHuffmanTree():

    def __init__(self, filename):
        self.root = None
        self.filename = filename
        self.text = ""
        self.char_freq = dict()
        self.prefix_code = dict()
        self.bits_to_save = bitarray("10000000")
        # self.bits_to_save = bitarray()
        self.bit_counter = 0


    def createHuffmanTree(self):
        self.__readAllFile()
        self.__loadChars()
        
        Q = []
        for item in self.char_freq.items():
            heapq.heappush(Q, (item[1], Node(item[1], item[0])))

        n = len(Q)
        for _ in range(n-1):
            left = heapq.heappop(Q)
            left[1].huff = 0b0
            right = heapq.heappop(Q)
            right[1].huff = 0b1
            value = left[0] + right[0]
            z = Node(value, left=left[1], right=right[1])
            heapq.heappush(Q, (value, z))
            
        self.root = heapq.heappop(Q)[1]
        
        self.__createHuffmanCode(self.root, bitarray())



    def compressData(self, file_to_write):
        self.__saveHuffmanCode(self.root)
        # self.bits_to_save.fill()
        for i in self.text:
            self.bits_to_save.extend(self.prefix_code[i])
        self.__lastBits(len(self.bits_to_save))
        with open(file_to_write,"wb") as f:
            self.bits_to_save.tofile(f)



    def __createHuffmanCode(self, node : Node, huff_code : bitarray):
        if node.char is not None:
            self.prefix_code[node.char] = huff_code
            return
        
        code = huff_code.copy()
        if node.left is not None:
            code.append(0)
            self.__createHuffmanCode(node.left, code)

        if node.right is not None:
            code = huff_code.copy()
            code.append(1)
            self.__createHuffmanCode(node.right, code)



    def __saveHuffmanCode(self, node: Node):
        if node.char is not None:
            self.bits_to_save.append(1)
            ba = bitarray()
            ba.frombytes(node.char.encode("utf-8"))
            utf_len = len(ba) // BITS_IN_BYTE
            match utf_len:
                case 1:
                    self.bits_to_save.extend(ONE)
                case 2:
                    self.bits_to_save.extend(TWO)
                case 3:
                    self.bits_to_save.extend(THREE)
                case 4:
                    self.bits_to_save.extend(FOUR)

            self.bits_to_save.extend(ba)
            return

        if node.left is not None:
            self.bits_to_save.append(0)
            self.__saveHuffmanCode(node.left)
            
        if node.right is not None:
            self.bits_to_save.append(0)
            self.__saveHuffmanCode(node.right)


    
    def __lastBits(self, num : int):
        # kalamarnica to ladna ryba :"DDDD
        x = int2ba(self.bits_to_save.fill())
        i = x.fill()
        x >>= i
        self.bits_to_save[:BITS_IN_BYTE] = x


    def __readAllFile(self):
        with open(self.filename, "r") as f:
            self.text = f.read()
    


    # decompressing

    def decompressData(self, file_to_write):
        data = bitarray()
        with open(self.filename, "rb") as f:
            # data = f.read()
            data.fromfile(f)
            # print(len(data))
            self.__restoreHuffman(data)
        
        with open(file_to_write, "w") as f:
            f.write(self.text)
    

    def __restoreHuffman(self, data: bitarray):
        
        byte = self.__getNBytes(data, 1)
        last_bits = ba2int(byte)
        n = len(data) - last_bits
        # n = len(data)

        self.root = Node(0)
        self.__recHuffTree(data, self.root)
        self.prefix_code = dict()
        self.__createHuffmanCode(self.root, bitarray())
        self.__ba2Txt(data, n)



    def __recHuffTree(self, data: bitarray, vert: Node):
        if vert.left is not None and vert.right is not None:
            return
        bit = self.__getBit(data)
        # print(bit)
        # print(data)
        if bit == 1:
            len_bytes = ba2int(self.__getNBit(data, 2)) + 1
            byte = self.__getNBytes(data, len_bytes)
            vert.char = bitarray(byte).tobytes().decode("utf-8")
            return

        if vert.left is None:
            new_node = Node(0)
            vert.left = new_node
            self.__recHuffTree(data, new_node)


        bit = self.__getBit(data)
        
        if vert.right is None:
            new_node = Node(0)
            vert.right = new_node
            self.__recHuffTree(data, new_node)
        else:
            len_bytes = ba2int(self.__getNBit(data, 2)) + 1
            byte = self.__getNBytes(data, len_bytes)
            vert.char = bitarray(byte).tobytes().decode("utf-8")
            return
            
    
    
    def __ba2Txt(self, data: bitarray, length: int):
        vert = self.root
        print("Witam witam")
        while self.bit_counter <= length:
            bit = self.__getBit(data)
            if vert.char is not None:
                self.text = self.text + vert.char
                vert = self.root
            if bit == 0:
                vert = vert.left
            else:
                vert = vert.right

        

    def __getNBytes(self, tab : bitarray, n: int):
        x = tab[:n*BITS_IN_BYTE]
        self.bit_counter += n*BITS_IN_BYTE
        tab <<= n*BITS_IN_BYTE
        return x
    


    def __getBit(self, tab: bitarray):
        x = tab[:BIT][0]
        self.bit_counter += BIT
        tab <<= BIT
        return x
    
    def __getNBit(self, tab: bitarray, n : int):
        x = tab[:n*BIT]
        self.bit_counter += n*BIT
        tab <<= n*BIT
        return x



    def __loadChars(self):
        for char in self.text:
            if char in self.char_freq:
                self.char_freq[char] += 1
            else:
                self.char_freq[char] = 1



    def __printResult(self, tmp):
        print(tmp)
        
        if tmp.left is not None:
            self.__printResult(tmp.left)
        if tmp.right is not None:
            self.__printResult(tmp.right)


    def __str__(self):
        self.__printResult(self.root)
        return ""




In [458]:
def compressFile(file_to_read, file_to_compress):
    huff = StaticHuffmanTree(file_to_read)
    huff.createHuffmanTree()
    huff.compressData(file_to_compress)
    print(len(huff))


In [459]:
def decompressFile(file_compressed, file_to_write):
    huff_dec = StaticHuffmanTree(file_compressed)
    huff_dec.decompressData(file_to_write)



In [460]:
compressFile("input_files/1MB.txt", "bin_files/1MB.bin")

In [461]:
decompressFile("bin_files/1MB.bin", "output_files/1MB.txt")

KeyboardInterrupt: 