# Compress and decompress text file

In [152]:
import os
import struct
import csv
import time
import shutil

def findRangePow(x):
    i = 0
    while pow(2,i) - 1 < x:i+=1
    return i

def packZip(outputFolderName, inputParentPath):
    shutil.make_archive(outputFolderName, 'zip', inputParentPath)

def unpackZip(inputPath, inputParentPath):
    shutil.unpack_archive(inputPath ,inputParentPath, 'zip')

def setUpPathToCom(inputPath):
    inputName = os.path.basename(inputPath).split('.')[0]
    inputParentPath = os.path.dirname(inputPath)

    outputFolderName = inputName + '.comTxt'
    outputFolderPath = os.path.join(inputParentPath,outputFolderName)

    outputFolderRazName = inputName + '.comTxt.raz'
    outputFolderRazPath = os.path.join(inputParentPath,outputFolderRazName)

    dicPath = os.path.join(outputFolderPath, 'dictonary.csv')
    compressedPath = os.path.join(outputFolderPath, '{}.comTxt.bin'.format(inputName))


    if not os.path.exists(outputFolderPath):
        os.mkdir(outputFolderPath)

    return outputFolderPath, outputFolderName, dicPath, compressedPath

def setUpPathToDecom(inputPath, opt='txt'):
    # Check valid
    if inputPath.endswith('.comTxt.zip'):
        message = "Invalid file"
    
    inputName = os.path.basename(inputPath).split('.')[0]
    inputParentPath = os.path.dirname(inputPath)


    outputFolderName = inputName + '.decomTxt'
    outputFolderPath = os.path.join(inputParentPath,outputFolderName)

    toDecompressedPath = os.path.join(outputFolderPath, '{}.comTxt.bin'.format(inputName))
    if opt == 'txt':
        decompressedPath = os.path.join(outputFolderPath, '{}.decom.txt'.format(inputName))
    else:
        decompressedPath = os.path.join(outputFolderPath, '{}.decom.png'.format(inputName))


    

    if not os.path.exists(outputFolderPath):
        os.mkdir(outputFolderPath)
    
    return toDecompressedPath, decompressedPath, outputFolderPath
    


def saveDicttoCSV(dicPath, dictionary):
    headers = ['CodeWord', 'Code']
    with open(dicPath, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(headers)
        for key, value in dictionary.items():
            # print(key, value)
            writer.writerow([key,value])

def compress_txt(inputPath, outputPath, type='H'):
    # Set up parameter
    code = 256
    dictionary = {bytes([i]): i for i in range(code)}
    compressed_data = bytearray()
    current_sequence = b""

    # Read input file
    with open(inputPath, "r", encoding="utf-8") as input_file:
        data = input_file.read().encode("utf-8")

    inputSize = len(data)*8
    compressedSize = 0

    # Compress data using LZW
    for byte in data:
        sequence = current_sequence + bytes([byte])
        if sequence in dictionary:
            current_sequence = sequence
        else:
            compressedSize += findRangePow(dictionary[current_sequence])
            compressed_data += struct.pack(type, dictionary[current_sequence])
            dictionary[sequence] = code
            code += 1
            current_sequence = bytes([byte])

    if current_sequence:
        compressedSize += findRangePow(dictionary[current_sequence])
        compressed_data += struct.pack(type, dictionary[current_sequence])

    #Save type in compressed data
    compressed_data = compressed_data + bytes('<.>{}'.format(type), 'utf-8')
    
    # Write compressed data to output file
    with open(outputPath, "wb") as output_file:
        output_file.write(compressed_data)

    return inputSize, compressedSize, dictionary

def compress_png(inputPath, outputPath, type='H'):
    # Set up parameter
    code = 256
    dictionary = {bytes([i]): i for i in range(code)}
    compressed_data = bytearray()
    current_sequence = b""

    # Open image and convert to raw pixel data
    with Image.open(inputPath) as img:
        data = img.convert("RGB").tobytes()
        width, height = img.size
        print(width, height)

    inputSize = len(data)*8
    compressedSize = 0

    # Compress data using LZW
    for byte in data:
        sequence = current_sequence + bytes([byte])
        if sequence in dictionary:
            current_sequence = sequence
        else:
            compressedSize += findRangePow(dictionary[current_sequence])
            compressed_data += struct.pack(type, dictionary[current_sequence])
            dictionary[sequence] = code
            code += 1
            current_sequence = bytes([byte])

    if current_sequence:
        compressedSize += findRangePow(dictionary[current_sequence])
        compressed_data += struct.pack(type, dictionary[current_sequence])

    #Save type in parameter image and type
    compressed_data = compressed_data + bytes('<.>{}'.format(width), 'utf-8')
    compressed_data = compressed_data + bytes('<.>{}'.format(height), 'utf-8')
    compressed_data = compressed_data + bytes('<.>{}'.format(type), 'utf-8')

    print(compressed_data)
    
    # Write compressed data to output file
    with open(outputPath, "wb") as output_file:
        output_file.write(compressed_data)

    return inputSize, compressedSize, dictionary

def decompress_txt(inputPath, outputPath):
    # Read compressed data from input file
    with open(inputPath, "rb") as input_file:
        compressed_data = input_file.read()
    print(compressed_data)

    compressed_data, type = compressed_data.split(bytes('<.>', 'utf-8'))[:]
    step = 2
    match type:
        case 'H':
            step = 2
        case 'I':
            step = 4
        case 'Q':
            step = 8

    # Decompress data using LZW
    dictionary = {i: bytes([i]) for i in range(256)}
    code = 256
    decompressed_data = bytearray()
    current_sequence = bytes()
    

    for i in range(0, len(compressed_data), step):
        value = struct.unpack(type, compressed_data[i:i+step])[0]
        if value in dictionary:
            sequence = dictionary[value]
            # print("In dic", sequence[0])
            # print("sequence", sequence)
        elif value == code:
            sequence = current_sequence + bytes([current_sequence[0]])
        else:
            raise ValueError("Invalid compressed data")
        decompressed_data += sequence
        # print("Decompress", decompressed_data)
        if current_sequence:
            print("in if current", sequence[0])
            dictionary[code] = current_sequence + bytes([sequence[0]])
            code += 1
            # print("Code", code)
        current_sequence = sequence

    # Write decompressed data to output file
    with open(outputPath, "w", encoding="utf-8") as output_file:
        output_file.write(decompressed_data.decode("utf-8"))


def decompress_png(inputPath, outputPath):
    # Read compressed data from input file
    with open(inputPath, "rb") as input_file:
        compressed_data = input_file.read()
    print(compressed_data)

    compressed_data, width, height, type = compressed_data.split(bytes('<.>', 'utf-8'))[:]
    print(type)
    step = 2
    match type:
        case 'H':
            step = 2
        case 'I':
            step = 4
        case 'Q':
            step = 8

    # Decompress data using LZW
    dictionary = {i: bytes([i]) for i in range(256)}
    code = 256
    decompressed_data = bytearray()
    current_sequence = bytes()
    

    for i in range(0, len(compressed_data), step):
        value = struct.unpack(type, compressed_data[i:i+step])[0]
        if value in dictionary:
            sequence = dictionary[value]
            # print("In dic", sequence[0])
            # print("sequence", sequence)
        elif value == code:
            sequence = current_sequence + bytes([current_sequence[0]])
        else:
            raise ValueError("Invalid compressed data")
        decompressed_data += sequence
        # print("Decompress", decompressed_data)
        if current_sequence:
            # print("in if current", sequence[0])
            dictionary[code] = current_sequence + bytes([sequence[0]])
            code += 1
            # print("Code", code)
        current_sequence = sequence

    width = int(width)
    height = int(height)

    # width, height = original_image.size
    print('Width', width, 'Height', height)
    img = Image.frombytes("RGB", (width, height), decompressed_data)
    img.save(outputPath, format="PNG")

def compress_lzw_utf8(inputPath, type="H"):

    startTime = time.time()
    outputFolderPath, outputFolderName, dicPath, compressedPath = setUpPathToCom(inputPath)

    inputSize, compressedSize, dictionary = compress_txt(inputPath,compressedPath, type=type)

    saveDicttoCSV(dicPath,dictionary)
    packZip(outputFolderName, outputFolderPath)

    # Check if compressed file is smaller than input file
    elapsedTime = '{:.5}s'.format(time.time() - startTime)
    print("Eslapsed time: ", elapsedTime)
    compression_ratio = compressedSize / inputSize
    if compression_ratio >= 1:
        message = "WARNING: Compressed file is not smaller than input file"
    else:
        message = "Compression successful: {:.2%} reduction in file size".format(1 - compression_ratio)
    print(message)

def decompress_lzw_utf8(inputPath):

    startTime = time.time()
    toDecompressedPath, decompressedPath, outputFolderPath = setUpPathToDecom(inputPath, opt='png')
    unpackZip(inputPath ,outputFolderPath)
    decompress_png(toDecompressedPath, decompressedPath)
    print("Decompression successful")



# Compress and decompress png file

In [153]:
from PIL import Image
import struct
import os    

def compress_lzw_png(inputPath, type="H"):

    startTime = time.time()
    outputFolderPath, outputFolderName, dicPath, compressedPath = setUpPathToCom(inputPath)

    inputSize, compressedSize, dictionary = compress_png(inputPath,compressedPath, type=type)

    saveDicttoCSV(dicPath,dictionary)
    packZip(outputFolderName, outputFolderPath)

    # Check if compressed file is smaller than input file
    elapsedTime = '{:.5}s'.format(time.time() - startTime)
    print("Eslapsed time: ", elapsedTime)
    compression_ratio = compressedSize / inputSize
    if compression_ratio >= 1:
        message = "WARNING: Compressed file is not smaller than input file"
    else:
        message = "Compression successful: {:.2%} reduction in file size".format(1 - compression_ratio)
    print(message)


def decompress_lzw_png(inputPath):

    startTime = time.time()
    toDecompressedPath, decompressedPath, outputFolderPath = setUpPathToDecom(inputPath)
    unpackZip(inputPath ,outputFolderPath)
    decompress_png(toDecompressedPath, decompressedPath)
    print("Decompression png successful")


In [52]:
print(bytes('.', 'utf-8'))

b'.'


In [154]:
inputPath = 'D:/Project/DPT/LzwCompressor/heeh.png'

compress_lzw_png(inputPath)

15 13
bytearray(b'\xff\x00\x00\x01\x01\x01\x02\x01\x03\x01\x04\x01\x05\x01\x06\x01\x07\x01\x08\x01\t\x01\n\x01\x0b\x01\x0c\x01\r\x01\x0e\x01\x0f\x01\x10\x01\x07\x01\xfb\x00\xe6\x00\xe5\x00\x08\x01\xfe\x00\xee\x00\x13\x01\xe5\x00\x19\x01\x1b\x01\x14\x01\x1c\x01\xe5\x00\xf3\x00\xf1\x00\xf0\x00\x04\x01\x19\x01\xda\x00"\x00\x19\x00\xf7\x00\xd8\x00\xd8\x00\x04\x01\xe4\x00\x83\x00\x82\x00\xd4\x00\x06\x00\x02\x00%\x01\x19\x002\x014\x01&\x01/\x01\x02\x00\xde\x00Z\x00W\x00\x01\x01$\x016\x010\x01\xf1\x00\xba\x00\xb8\x00\x04\x019\x01W\x007\x015\x01\x19\x00F\x01>\x018\x01:\x01\x01\x01(\x01\xd8\x007\x017\x01\xea\x00\x98\x00\x96\x00C\x01<\x004\x00I\x013\x01&\x01G\x017\x01D\x01<\x01\x14\x012\x017\x01\xe4\x00p\x00k\x00^\x01\xe5\x00G\x01g\x01Z\x01J\x01]\x01\x00\x01=\x01H\x010\x01k\x01\xff\x00\xf3\x00\xc8\x00\xc7\x00X\x01h\x01Y\x01n\x01K\x01;\x01l\x01_\x01j\x01V\x01\x01\x01\xee\x00\xac\x00\xaa\x00u\x01i\x01w\x01`\x01o\x01L\x01{\x01f\x01J\x012\x01\x01\x01R\x01\x96\x00X\x01D\x01\xde\x00V\x01[\x01\x87\x01z

In [155]:
inputPath = 'D:\Project\DPT\LzwCompressor\heeh.comTxt.zip'
decompress_lzw_png(inputPath)

b'\xff\x00\x00\x01\x01\x01\x02\x01\x03\x01\x04\x01\x05\x01\x06\x01\x07\x01\x08\x01\t\x01\n\x01\x0b\x01\x0c\x01\r\x01\x0e\x01\x0f\x01\x10\x01\x07\x01\xfb\x00\xe6\x00\xe5\x00\x08\x01\xfe\x00\xee\x00\x13\x01\xe5\x00\x19\x01\x1b\x01\x14\x01\x1c\x01\xe5\x00\xf3\x00\xf1\x00\xf0\x00\x04\x01\x19\x01\xda\x00"\x00\x19\x00\xf7\x00\xd8\x00\xd8\x00\x04\x01\xe4\x00\x83\x00\x82\x00\xd4\x00\x06\x00\x02\x00%\x01\x19\x002\x014\x01&\x01/\x01\x02\x00\xde\x00Z\x00W\x00\x01\x01$\x016\x010\x01\xf1\x00\xba\x00\xb8\x00\x04\x019\x01W\x007\x015\x01\x19\x00F\x01>\x018\x01:\x01\x01\x01(\x01\xd8\x007\x017\x01\xea\x00\x98\x00\x96\x00C\x01<\x004\x00I\x013\x01&\x01G\x017\x01D\x01<\x01\x14\x012\x017\x01\xe4\x00p\x00k\x00^\x01\xe5\x00G\x01g\x01Z\x01J\x01]\x01\x00\x01=\x01H\x010\x01k\x01\xff\x00\xf3\x00\xc8\x00\xc7\x00X\x01h\x01Y\x01n\x01K\x01;\x01l\x01_\x01j\x01V\x01\x01\x01\xee\x00\xac\x00\xaa\x00u\x01i\x01w\x01`\x01o\x01L\x01{\x01f\x01J\x012\x01\x01\x01R\x01\x96\x00X\x01D\x01\xde\x00V\x01[\x01\x87\x01z\x01\xff\x00m\x0

TypeError: argument 1 must be read-only bytes-like object, not bytearray

In [1]:
from tkinter import *
from tkinter import ttk
from tkinter import filedialog as fd
import os
import time
import threading



from PIL import Image
import struct
import os

def compress_lzw_png(input_path, output_path):
    # Open image and convert to raw pixel data
    with Image.open(input_path) as img:
        raw_data = img.convert("RGB").tobytes()
        original_size = len(raw_data)

    # Initialize dictionary with all possible byte values
    dictionary = {bytes([i]): i for i in range(256)}

    # Compress data using LZW algorithm
    compressed_data = bytearray()
    current_sequence = bytes()
    code = 256
    for byte in raw_data:
        symbol_sequence = current_sequence + bytes([byte])
        if symbol_sequence in dictionary:
            current_sequence = symbol_sequence
        else:
            compressed_data += struct.pack(">H", dictionary[current_sequence])
            dictionary[symbol_sequence] = code
            code += 1
            current_sequence = bytes([byte])

    # Pack final code and write compressed data to output file
    if current_sequence:
        compressed_data += struct.pack(">H", dictionary[current_sequence])
    with open(output_path, "wb") as output_file:
        output_file.write(compressed_data)

    # Compute and print compression ratio
    # original_size = os.path.getsize(input_path)
    compressed_size = os.path.getsize(output_path)
    print('This is compressed_size', compressed_size)
    compression_ratio = original_size / compressed_size
    print("Compression ratio:", compression_ratio)

def getWidthHeightFromPNG(pathToDecompress):
    original_image = Image.open(pathToDecompress)

    width, height = original_image.size
    return width, height

def getWidthHeightFromFileName(pathToDecompress):
    # D:\Project\DPT\LzwCompressor\LZW-Text-File-Compression\input_compressed_1366_768.bin
    print(pathToDecompress)
    width, height = pathToDecompress.split("\\")[-1].split(".")[0].split("_")[2:4]
    return int(width), int(height)


def decompress_lzw_png(compressed_path, output_path, width, height):
    # Read the compressed data from the file
    with open(compressed_path, "rb") as f:
        compressed_data = f.read()

    # Decompress the data using LZW
    table = {i: bytes([i]) for i in range(256)}
    next_code = 256
    string = b""
    output_data = b""
    for i in range(0, len(compressed_data), 2):
        code = struct.unpack(">H", compressed_data[i:i+2])[0]
        if code not in table:
            new_string = string + bytes([string[0]])
        else:
            new_string = table[code]
        output_data += new_string
        if string:
            table[next_code] = string + bytes([new_string[0]])
            next_code += 1
        string = new_string

    # Convert the raw image data to a PNG image and save it

    print('Width', width, 'Height', height)
    img = Image.frombytes("RGB", (width, height), output_data)
    img.save(output_path, format="PNG")
    print('Decompress image successfull')
    

root = Tk()
root.title("Compress and Decompress Text File/ Image")

# Example text
filePath = 'Nhập đường dẫn ở đây ...'


# Define function
def step(opt='com'):
    my_progress.start(1)
    root.update_idletasks()

    time.sleep(1)
    pathToCompress = feet.get()
    parentPath = os.path.abspath(os.path.join(pathToCompress, os.pardir))

    if(opt=='com'):

        outName, typeName = pathToCompress.split('/')[-1].split('.')[:]


        outFile = ''
        if pathToCompress.endswith('png'):
            width, height = getWidthHeightFromPNG(pathToCompress)
            outFile = os.path.join(os.path.abspath(os.path.join(pathToCompress, os.pardir)), outName + '_compressed_' +str(width) + '_' +str(height) + '.img')
            compress_lzw_png(pathToCompress, outFile)
        else:
            outFile = os.path.join(os.path.abspath(os.path.join(pathToCompress, os.pardir)), outName + '_compressed.' + 'bin')
            compress_lzw_utf8(pathToCompress, outFile)


        out.set(outFile)
        infoFileOut.set(file_size(outFile))


        print("Finish")

        my_progress.stop()
        my_progress['value'] = 0
    else:
        

        outName, typeName = pathToCompress.split('/')[-1].split('.')[:]
        outFile  = ""

        if pathToCompress.endswith('img'):
            width, height = getWidthHeightFromFileName(pathToCompress)

            print('This is width, height', width, height)

            outFile = os.path.join(os.path.abspath(os.path.join(pathToCompress, os.pardir)), outName + '_decompressed' + '.png')
            decompress_lzw_png(pathToCompress, outFile, width, height)

        else:
            outFile = os.path.join(os.path.abspath(os.path.join(pathToCompress, os.pardir)), outName + '_decompressed' +'.txt')
            decompress_lzw_utf8(pathToCompress, outFile)

        out.set(outFile)
        infoFileOut.set(file_size(outFile))
        print("Finish decompressed")

        my_progress.stop()
        my_progress['value'] = 0


def stop():
    my_progress.stop()
    my_progress['value'] = 0

def convert_bytes(num):
    """
    this function will convert bytes to MB.... GB... etc
    """
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if num < 1024.0:
            return "%3.1f %s" % (num, x)
        num /= 1024.0
 
def file_size(file_path):
    """
    this function will return the file size
    """
    if os.path.isfile(file_path):
        file_info = os.stat(file_path)
        return convert_bytes(file_info.st_size)

def select_file():
    filetypes = (
        ('text files', '*.txt'),
        ('text files', '*.docx'),
        ('All files', '*.*')
    )

    filename = fd.askopenfilename(
        title='Open a file',
        initialdir='/',
        filetypes=filetypes)
    
    feet.set(filename)
    infoFile.set(file_size(filename))

    print(infoFile)

def select_image():
    filetypes = (
    ('png images', '*.png'),
    ('jpg images', '*.jpg'),
    ('All files', '*.*')
    )
    filename = fd.askopenfilename(
    title='Open a file',
    initialdir='/',
    filetypes=filetypes)
    
    feet.set(filename)
    infoFile.set(file_size(filename))

    print(infoFile)


def compress():
    threading.Thread(target=step('com')).start()


def decompress():
    threading.Thread(target=step(opt='des')).start()


mainframe = ttk.Frame(root, padding="3 3 12 12")
mainframe.grid(column=0, row=0, sticky=(N, W, E, S))
root.columnconfigure(0, weight=1)
root.rowconfigure(0, weight=1)

feet = StringVar()
feet.set(filePath)
feet_entry = ttk.Entry(mainframe, width=50, textvariable=feet)
feet_entry.grid(column=1, row=1, sticky=(W, E))

out = StringVar()
out_entry = ttk.Entry(mainframe, width=50, textvariable=out)
out_entry.grid(column=3, row=1, sticky=(W, E))


infoFile = StringVar()
ttk.Label(mainframe, textvariable=infoFile).grid(column=1, row=2, sticky=(W,E))

infoFileOut = StringVar()
ttk.Label(mainframe, textvariable=infoFileOut).grid(column=3, row=2, sticky=(W,E))

my_progress = ttk.Progressbar(mainframe, orient=HORIZONTAL,length=100, mode='indeterminate')
my_progress.grid(column=2, row=5)




meters = StringVar()
ttk.Label(mainframe, textvariable=meters).grid(column=2, row=2, sticky=(N, S))

ttk.Button(mainframe, text="Compress", command=compress).grid(column=2, row=3, sticky=W)
ttk.Button(mainframe, text="Decompress", command=decompress).grid(column=2, row=4, sticky=W)

ttk.Button(mainframe, text="Open file", command=select_file).grid(column=2, row=1, sticky=W)
ttk.Button(mainframe, text="Open PNG", command=select_image).grid(column=2, row=2, sticky=W)
# ttk.Label(mainframe, text="is equivalent to").grid(column=1, row=2, sticky=E)
# ttk.Label(mainframe, text="meters").grid(column=3, row=2, sticky=W)

for child in mainframe.winfo_children(): 
    child.grid_configure(padx=5, pady=5)

feet_entry.focus()

# Short cut
# root.bind("<Return>", calculate)

root.mainloop()

In [12]:
imagePath = 'input.png'

import cv2
import numpy as np
import struct


img = cv2.imread(imagePath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
w,h,_ = img.shape

# print(np.array(img[:,:,0]).flatten())
for chanel in range(3):
    imgArr = np.array(img[:,:,chanel])

    imgFla = imgArr.flatten()

    print(max(imgFla))
    print(len(imgFla))


    dictionary = {str(i): i for i in range(256)}
    print("Dictionary", dictionary)

    code = 256
    compressed_data = bytearray()
    current_sequence = ""
    output_data = []

    for byte in imgFla:
        # print("Byte: ", byte)
        if current_sequence:
            sequence = current_sequence + "+" + str(byte)
        else:
            sequence = str(byte)
        # print("Current sequence:", current_sequence)
        # print("Sequence", sequence)
        if sequence in dictionary:
            # print("In dictionary")
            current_sequence = sequence
        else:
            # print('What is struct:', dictionary[current_sequence])
            compressed_data += struct.pack("H", dictionary[current_sequence])
            output_data.append(dictionary[current_sequence])
            dictionary[sequence] = code
            code += 1
            current_sequence = str(byte)

        # print(compressed_data)

    if current_sequence:
        compressed_data += struct.pack("H", dictionary[current_sequence])
        output_data.append(dictionary[current_sequence])

    if chanel == 2:
        compressed_data = compressed_data + bytes('<.>{}'.format(w), 'utf-8')
        compressed_data = compressed_data + bytes('<.>{}'.format(h), 'utf-8')
    # print("Final compressed data: ", compressed_data)
    # print("Final output data: ", output_data[:])
    # print("Final output data: ", imgFla[:])
    # print("Dictionary", dictionary)

    output_path = 'anhnen{}.bin'.format(chanel)
    with open(output_path, "wb") as output_file:
        output_file.write(compressed_data)

255
86768
Dictionary {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, '11': 11, '12': 12, '13': 13, '14': 14, '15': 15, '16': 16, '17': 17, '18': 18, '19': 19, '20': 20, '21': 21, '22': 22, '23': 23, '24': 24, '25': 25, '26': 26, '27': 27, '28': 28, '29': 29, '30': 30, '31': 31, '32': 32, '33': 33, '34': 34, '35': 35, '36': 36, '37': 37, '38': 38, '39': 39, '40': 40, '41': 41, '42': 42, '43': 43, '44': 44, '45': 45, '46': 46, '47': 47, '48': 48, '49': 49, '50': 50, '51': 51, '52': 52, '53': 53, '54': 54, '55': 55, '56': 56, '57': 57, '58': 58, '59': 59, '60': 60, '61': 61, '62': 62, '63': 63, '64': 64, '65': 65, '66': 66, '67': 67, '68': 68, '69': 69, '70': 70, '71': 71, '72': 72, '73': 73, '74': 74, '75': 75, '76': 76, '77': 77, '78': 78, '79': 79, '80': 80, '81': 81, '82': 82, '83': 83, '84': 84, '85': 85, '86': 86, '87': 87, '88': 88, '89': 89, '90': 90, '91': 91, '92': 92, '93': 93, '94': 94, '95': 95, '96': 96, '97': 97, '98': 98, '99': 99

In [10]:
imgFla = np.array([1, 7, 7, 7, 5, 88, 88, 88, 88, 88, 88, 9])

print(max(imgFla))
print(len(imgFla))


dictionary = {str(i): i for i in range(256)}
print("Dictionary", dictionary)

code = 256
compressed_data = bytearray()
current_sequence = ""
output_data = []

for byte in imgFla:
    # print("Byte: ", byte)
    if current_sequence:
        sequence = current_sequence + "+" + str(byte)
    else:
        sequence = str(byte)
    # print("Current sequence:", current_sequence)
    # print("Sequence", sequence)
    if sequence in dictionary:
        # print("In dictionary")
        current_sequence = sequence
    else:
        # print('What is struct:', dictionary[current_sequence])
        compressed_data += struct.pack("H", dictionary[current_sequence])
        output_data.append(dictionary[current_sequence])
        dictionary[sequence] = code
        code += 1
        current_sequence = str(byte)

    # print(compressed_data)

if current_sequence:
    compressed_data += struct.pack("H", dictionary[current_sequence])
    output_data.append(dictionary[current_sequence])

print("Final compressed data: ", compressed_data)
print("Final output data: ", output_data[:])
print("Final output data: ", imgFla[:])
print("Dictionary", dictionary)

88
12
Dictionary {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, '11': 11, '12': 12, '13': 13, '14': 14, '15': 15, '16': 16, '17': 17, '18': 18, '19': 19, '20': 20, '21': 21, '22': 22, '23': 23, '24': 24, '25': 25, '26': 26, '27': 27, '28': 28, '29': 29, '30': 30, '31': 31, '32': 32, '33': 33, '34': 34, '35': 35, '36': 36, '37': 37, '38': 38, '39': 39, '40': 40, '41': 41, '42': 42, '43': 43, '44': 44, '45': 45, '46': 46, '47': 47, '48': 48, '49': 49, '50': 50, '51': 51, '52': 52, '53': 53, '54': 54, '55': 55, '56': 56, '57': 57, '58': 58, '59': 59, '60': 60, '61': 61, '62': 62, '63': 63, '64': 64, '65': 65, '66': 66, '67': 67, '68': 68, '69': 69, '70': 70, '71': 71, '72': 72, '73': 73, '74': 74, '75': 75, '76': 76, '77': 77, '78': 78, '79': 79, '80': 80, '81': 81, '82': 82, '83': 83, '84': 84, '85': 85, '86': 86, '87': 87, '88': 88, '89': 89, '90': 90, '91': 91, '92': 92, '93': 93, '94': 94, '95': 95, '96': 96, '97': 97, '98': 98, '99': 99, '1

# PNG


In [13]:
image = []
for chanel in range(3):
    with open('anhnen{}.bin'.format(chanel), "rb") as input_file:
        compressed_data = input_file.read()
        print(compressed_data)
    if chanel == 2:
        compressed_data, width, height = compressed_data.split(bytes('<.>', 'utf-8'))[:]
        width = int(width)
        height = int(height)
    dictionary = {str(i): i for i in range(256)}
    code = 256
    decompressed_data = []

    def takeSequence(sequence):
        result = [int(i) for i in sequence.split('+')]
        return result


    current_sequence = ""
    for i in range(0, len(compressed_data), 2):
        value = str(struct.unpack("H", compressed_data[i:i+2])[0])
        # print("value", value)
        if value in dictionary:
            sequence = str(dictionary[value])
            # print('In dic', sequence)
        elif value == str(code):
            # print("Value = code")
            sequence = current_sequence + "+" + str(takeSequence(current_sequence)[0])
        else:
            raise ValueError("Invalid compressed data")
        for i in takeSequence(sequence):
            decompressed_data.append(i) 
        
        if current_sequence:
            # print("New", str(current_sequence + "+" +str(takeSequence(sequence)[0])))
            dictionary[str(code)] = str(current_sequence + "+" + str(takeSequence(sequence)[0]))
            code += 1
            # print('Code', code)
        current_sequence = sequence
        # print('Current', current_sequence)
        # print("Output",decompressed_data)
        # print("Dic",dictionary)
    image.append(decompressed_data)

print(np.array(image).shape)
image_merge = cv2.merge([np.array(image[2]).reshape(width,height).astype(np.uint8), np.array(image[1]).reshape(width,height).astype(np.uint8), np.array(image[0]).reshape(width,height).astype(np.uint8)])
print(np.array(image[0]).reshape(w,h).astype(np.uint8))
cv2.imwrite('hahah.png', image_merge)


b'J\x00J\x00\xff\x00\x02\x01\x03\x01\x04\x01\x05\x01\x06\x01\x07\x01\x08\x01\t\x01\n\x01\x0b\x01\x0c\x01\r\x01\x0e\x01\x0f\x01\x0e\x01L\x00\x01\x01\x10\x01\x14\x01\x15\x01\x16\x01\x17\x01\x18\x01\x19\x01L\x00L\x00\x19\x01\x1d\x01\x1e\x01\x1f\x01 \x01\x0e\x01J\x00K\x00!\x01%\x01&\x01\'\x01\x17\x01L\x00I\x00(\x01,\x01-\x01.\x01\x04\x01K\x00\x13\x01/\x013\x014\x01\x1e\x01\x00\x015\x018\x019\x01\x14\x01\x12\x01:\x01=\x01>\x01\x08\x011\x01?\x01B\x01B\x01#\x01C\x01F\x01:\x017\x01G\x01J\x013\x01I\x01K\x01N\x01(\x01A\x01O\x01R\x01%\x01J\x00\x1c\x01S\x01W\x01\x1e\x01K\x00V\x01X\x01\\\x01\x16\x01J\x00+\x01]\x01a\x01\x10\x01Z\x01b\x01e\x01\r\x01Q\x01f\x01i\x01\x07\x01h\x01j\x01m\x01\x02\x01E\x01n\x01q\x01l\x01q\x01i\x01K\x00$\x01t\x01m\x01M\x01x\x01f\x01z\x01{\x01b\x01d\x01~\x01f\x01I\x00[\x01\x81\x01a\x01}\x01\x85\x01X\x01s\x01\x88\x01W\x01\x87\x01\x8b\x01R\x01p\x01\x8e\x01X\x01\x1b\x01\x91\x01\\\x01\x8d\x01\x94\x01J\x01\x8a\x01\x97\x01J\x01\x96\x01\r\x01\xf3\x00\xfb\x00\x9f\x01\xa0\x01\xfb\x00\

True

ValueError: not enough values to unpack (expected 3, got 0)

In [125]:
total = 0
for index, value in enumerate(decompressed_data):
    if value != imgFla[index]:
        total +=1
        # print(index)

print(total)

95805


In [4]:
outImgFla = np.array(decompressed_data)
root = outImgFla.reshape(w,h)
root = root.astype(np.uint8)
print(root)
cv2.imshow('haha',root)

[[28 29 24 ... 24 24 24]
 [29 29 24 ... 24 24 24]
 [25 29 24 ... 24 24 24]
 ...
 [26 26  0 ... 24 24 24]
 [26 27  0 ... 24 24 24]
 [26 25  0 ... 24 24 24]]


: 

In [5]:
dictionary = {str(i): i for i in range(256)}
print("Dictionary:" ,dictionary)
for key, value in dictionary.items():
    print(key, value)
        

Dictionary: {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, '10': 10, '11': 11, '12': 12, '13': 13, '14': 14, '15': 15, '16': 16, '17': 17, '18': 18, '19': 19, '20': 20, '21': 21, '22': 22, '23': 23, '24': 24, '25': 25, '26': 26, '27': 27, '28': 28, '29': 29, '30': 30, '31': 31, '32': 32, '33': 33, '34': 34, '35': 35, '36': 36, '37': 37, '38': 38, '39': 39, '40': 40, '41': 41, '42': 42, '43': 43, '44': 44, '45': 45, '46': 46, '47': 47, '48': 48, '49': 49, '50': 50, '51': 51, '52': 52, '53': 53, '54': 54, '55': 55, '56': 56, '57': 57, '58': 58, '59': 59, '60': 60, '61': 61, '62': 62, '63': 63, '64': 64, '65': 65, '66': 66, '67': 67, '68': 68, '69': 69, '70': 70, '71': 71, '72': 72, '73': 73, '74': 74, '75': 75, '76': 76, '77': 77, '78': 78, '79': 79, '80': 80, '81': 81, '82': 82, '83': 83, '84': 84, '85': 85, '86': 86, '87': 87, '88': 88, '89': 89, '90': 90, '91': 91, '92': 92, '93': 93, '94': 94, '95': 95, '96': 96, '97': 97, '98': 98, '99': 99, '100': 