In [27]:
from scipy.fftpack import dct, idct
import scipy
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import matplotlib.pyplot as plt

# Quantization matrix
Z = [[16, 11, 10, 16, 24, 40, 51, 61],
     [12, 12, 14, 19, 26, 58, 60, 55],
     [14, 13, 16, 24, 40, 57, 69, 56],
     [14, 17, 22, 29, 51, 87, 80, 62],
     [18, 22, 37, 56, 68, 109, 103, 77],
     [24, 35, 55, 64, 81, 104, 113, 92],
     [49, 64, 78, 87, 103, 121, 120, 101],
     [72, 92, 95, 98, 112, 100, 103, 99]]

temporal = {}
frecuencia = {}

g = 0

def dct2(a):
    new_matrix = np.zeros((8, 8))
    b = np.zeros((8, 8))

    # Subtract 128 from each element
    for i in range(8):
        for j in range(8):
            b[i][j] = a[i][j] - 128

    # Apply 2D DCT
    c = scipy.fftpack.dct(scipy.fftpack.dct(b, axis=0, norm='ortho'), axis=1, norm='ortho')

    # Quantize the DCT coefficients
    for i in range(8):
        for j in range(8):
            new_matrix[i][j] = np.fix(c[i][j] / Z[i][j])

    return new_matrix

def zigzag(matrix):
    global g
    con = 0
    matrix = np.array(matrix)
    rows, columns = matrix.shape
    aux = np.zeros((1, 64))

    # Implementing zigzag ordering
    solution = [[] for _ in range(rows + columns - 1)]

    for i in range(rows):
        for j in range(columns):
            sum = i + j
            if sum % 2 == 0:
                solution[sum].insert(0, matrix[i][j])
            else:
                solution[sum].append(matrix[i][j])

    # Flatten the zigzag-ordered solution
    for i in solution:
        for j in i:
            aux[0, con] = j if j != -0.0 else abs(j)
            con += 1

    # Store the zigzag-ordered values and their frequencies
    for i in range(len(aux[0])):
        if aux[0, i] != 0:
            temporal[g] = aux[0, i]
            if aux[0, i] not in frecuencia:
                frecuencia[aux[0, i]] = aux[0, i]
            g += 1

def save_probabilities(dic, keys):
    with open("result.txt", "w") as file:
        for pos, key in enumerate(keys):
            file.write(f"{key}\t{dic[key]}\n")

# Load the image
image = Image.open("lena.jpg")
image.show()

# Convert to grayscale
gray_image = image.convert('L')
gray_image.save("gray.jpg")
gray_image.show()

# Get image dimensions
height, width = gray_image.size

# Convert image to numpy array
a = np.asarray(gray_image, dtype=np.float32)

# Subtract 128 from the image array
alternativo = a - 128
Image.fromarray(alternativo.astype(np.uint8)).save("restada.jpg")
restada_image = Image.open("restada.jpg")
restada_image.show()

# Initialize an empty matrix for the DCT results
im2 = np.zeros((height, width))

# Apply DCT to 8x8 blocks
for i in range(0, height, 8):
    for j in range(0, width, 8):
        im2[i:i+8, j:j+8] = dct2(a[i:i+8, j:j+8])

# Save the DCT result image
Image.fromarray(im2.astype(np.uint8)).save("dct.jpg")
dct_image = Image.open("dct.jpg")
dct_image.show()

# Apply zigzag ordering to the DCT coefficients
for i in range(0, height, 8):
    for j in range(0, width, 8):
        zigzag(im2[i:i+8, j:j+8])

# Save the DCT matrix to a text file
with open("dct.txt", "w") as f:
    for i in range(height):
        for j in range(width):
            value = im2[i, j]
            f.write(f"{abs(value) if value == -0.0 else value} ")
        f.write("\n")

# Calculate symbol probabilities
keys = list(frecuencia.keys())
elements = list(temporal.values())
probabilities = {key: elements.count(key) / len(elements) for key in keys}

# Save the probabilities to a text file
save_probabilities(probabilities, keys)

print("Image processing completed successfully.\nGenerated a probabilities file and a text file containing the processed JPEG matrix.")


Image processing completed successfully.
Generated a probabilities file and a text file containing the processed JPEG matrix.


In [28]:
from decimal import Decimal
import sys
import os
import operator

# Define the result file for probabilities
result_probabilities = "result.txt"
input_file = sys.argv[2]

def huffman_compressor():
    probabilities = {}
    code_table = {}

    # Read probabilities from the file
    with open(result_probabilities, 'r') as file:
        for line in file:
            line = line.split("\t")
            symbol = line[0].strip()
            probability = float(line[1].strip())
            probabilities[symbol] = probability

    # Generate Huffman codes
    code_table = huffman_code(probabilities)
    save_codes(code_table)

    with open(input_file, 'r') as txt, open("compressed.dat", 'wb') as output:
        txt = txt.read().rstrip().lower()

        encoded_text = text_encode(code_table, txt)
        padded_encoded = pad_encode(encoded_text)
        bit_array = generate_bit_array(padded_encoded)
        output.write(bytes(bit_array))

# Function to sort probabilities
def sort_probabilities(dic):
    sorted_probabilities = sorted(dic.items(), key=operator.itemgetter(1), reverse=False)
    return sorted_probabilities[0][0], sorted_probabilities[1][0]

# Generate Huffman codes
def huffman_code(dic):
    if len(dic) == 2:
        return dict(zip(dic.keys(), ['0', '1']))

    dic_copy = dic.copy()
    K1, K2 = sort_probabilities(dic)

    p1, p2 = dic_copy.pop(K1), dic_copy.pop(K2)
    dic_copy[K1 + K2] = p1 + p2

    c = huffman_code(dic_copy)
    ca1a2 = c.pop(K1 + K2)
    c[K1], c[K2] = ca1a2 + '0', ca1a2 + '1'

    return c

# Save the code table to a text file
def save_codes(dic):
    with open("codes.txt", "w") as file:
        for symbol in dic:
            file.write(f"{symbol}\t{dic[symbol]}\n")

# Encode the text using the Huffman codes
def text_encode(codes, text):
    encoded_text = ""
    for ch in text.split():
        if ch in codes:
            encoded_text += codes[ch]
    return encoded_text

# Pad the encoded text to make it a multiple of 8
def pad_encode(encoded):
    padding = 8 - len(encoded) % 8
    encoded += "0" * padding
    padded_info = "{0:08b}".format(padding)
    encoded = padded_info + encoded
    return encoded

# Generate a bit array from the binary string
def generate_bit_array(binary_string):
    if len(binary_string) % 8 != 0:
        exit(0)

    bit_array = bytearray()
    for i in range(0, len(binary_string), 8):
        byte = binary_string[i:i+8]
        bit_array.append(int(byte, 2))

    return bit_array

huffman_compressor()
print("\n\n")

# Get the size of the original and compressed files
original_size = os.path.getsize(input_file) / (1024 * 1024.0)
compressed_size = os.path.getsize("compressed.dat") / (1024 * 1024.0)
compression_percentage = ((compressed_size / original_size) * 100)

print(f"Original text: {input_file} Size: {original_size:.2f} MB")
print(f"Compressed file: compressed.dat Size: {compressed_size:.2f} MB")
print(f"File {input_file} compressed by {round(compression_percentage)}%")
print("Text file compressed successfully!\n\n")





Original text: /root/.local/share/jupyter/runtime/kernel-0dc3b3d7-0af3-4a50-a58d-c16999b4707d.json Size: 0.00 MB
Compressed file: compressed.dat Size: 0.00 MB
File /root/.local/share/jupyter/runtime/kernel-0dc3b3d7-0af3-4a50-a58d-c16999b4707d.json compressed by 1%
Text file compressed successfully!




In [29]:
import sys

# Get the name of the file to decompress from the command line arguments
file_name = sys.argv[2]

# Method that executes the main logic of the program, making calls to other methods that perform specific tasks.
def decompress():
    # Load the file to be decompressed. 'rb' indicates that the file is read in binary mode.
    with open(file_name, 'rb') as file:
        bit_string = ""

        # Read the binary file byte by byte
        byte = file.read(1)
        while byte:
            byte = ord(byte)
            bits = bin(byte)[2:].rjust(8, '0')
            bit_string += bits
            byte = file.read(1)

    text_encoded = remove_bit_padding(bit_string)
    decompressed_text = decode_text(text_encoded)
    write_file(decompressed_text)

# Write the decompressed text to a file
def write_file(decompressed_text):
    # Open the file in write mode. If it doesn't exist, it is created. If it exists, it is overwritten.
    with open("decompressed_matrix.txt", "w") as file:
        file.write(decompressed_text)

# Remove the extra zeros added for padding
def remove_bit_padding(encoded_text):
    padding = encoded_text[:8]
    extra_padding = int(padding, 2)
    encoded_text = encoded_text[8:]
    text = encoded_text[:-extra_padding]
    return text

# Decode the encoded text using the Huffman codes
def decode_text(text_encoded):
    code_table = {}

    # Open the codes file and load it into the dictionary
    with open("codes.txt", "r") as codes:
        for line in codes:
            symbol, code = line.strip().split("\t")
            code_table[code] = symbol if symbol != "salto" else "\n"

    current_code = ""
    decoded_text = ""
    con = 0

    # Iterate over the encoded text
    for bit in text_encoded:
        current_code += bit
        if current_code in code_table:
            con += 1
            char = code_table[current_code]
            if con == 256:
                decoded_text += char + "\n"
                con = 0
            else:
                decoded_text += char + " "
            current_code = ""
    return decoded_text

# Execute the decompression process
decompress()

print("\n\nThe file " + str(file_name) + " has been successfully decompressed!!\n")
print("The decompressed file is named 'decompressed_matrix.txt' and is saved in the folder where this executable is located.\n\n")




The file /root/.local/share/jupyter/runtime/kernel-0dc3b3d7-0af3-4a50-a58d-c16999b4707d.json has been successfully decompressed!!

The decompressed file is named 'decompressed_matrix.txt' and is saved in the folder where this executable is located.




In [34]:
from math import cos, sin, pi
from scipy.fftpack import dct, idct
import scipy
import numpy as np
from PIL import Image
import os

# Quantization matrix
Z = [[16, 11, 10, 16, 24, 40, 51, 61],
     [12, 12, 14, 19, 26, 58, 60, 55],
     [14, 13, 16, 24, 40, 57, 69, 56],
     [14, 17, 22, 29, 51, 87, 80, 62],
     [18, 22, 37, 56, 68, 109, 103, 77],
     [24, 35, 55, 64, 81, 104, 113, 92],
     [49, 64, 78, 87, 103, 121, 120, 101],
     [72, 92, 95, 98, 112, 100, 103, 99]]

# Function to perform the inverse DCT on an 8x8 block
def idct2(matrix):
    b = np.zeros((8, 8))
    new_matrix = np.zeros((8, 8))

    # Multiply the blocks with the quantization matrix
    for i in range(8):
        for j in range(8):
            b[i][j] = matrix[i][j] * Z[i][j]

    # Calculate the IDCT
    c = scipy.fftpack.idct(scipy.fftpack.idct(b, axis=0, norm='ortho'), axis=1, norm='ortho')

    # Add 128 to each element of the 8x8 block
    for i in range(8):
        for j in range(8):
            new_matrix[i][j] = c[i][j] + 128
    return new_matrix

# Load the matrix obtained from the JPEG process
matrix = np.loadtxt("dct.txt", dtype=float, usecols=range(256))
matrix = np.array(matrix)
image = np.zeros((256, 256))

# Apply the IDCT to each 8x8 block of the image
for i in range(0, 256, 8):
    for j in range(0, 256, 8):
        image[i:(i+8), j:(j+8)] = idct2(matrix[i:(i+8), j:(j+8)])

# Save and display the recovered image
Image.fromarray(image.astype(np.uint8)).save("decompressed.jpg")
I = Image.open("decompressed.jpg")
I.show()


In [17]:
matrix = np.loadtxt("matriz.txt",dtype=float, usecols=range(255))