In [2]:
%pip install numpy scipy torch torchvision tqdm scikit-image
%pip install numpy scipy torch torchvision tqdm scikit-image lpips


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [14]:
from skimage import io, color
from skimage.transform import resize
import os
import numpy as np

def load_images(image_folder, img_size=(256, 256)):
    images = []
    valid_extensions = ['.png', '.jpg', '.jpeg', '.bmp', '.tiff']
    for filename in os.listdir(image_folder):
        if not any(filename.lower().endswith(ext) for ext in valid_extensions):
            continue
        img = io.imread(os.path.join(image_folder, filename))
        if img is not None:
            img = resize(img, img_size)
            if len(img.shape) == 3:  # If it's a color image
                img = color.rgb2gray(img)
            images.append(img)
    return images

images = load_images('/Users/Behzad/Desktop/VCProject_Image-Compression-using-MDCT/colored')

In [15]:
from scipy.fftpack import dct, idct

def dct2(block):
    return dct(dct(block.T, norm='ortho').T, norm='ortho')

def idct2(block):
    return idct(idct(block.T, norm='ortho').T, norm='ortho')
    
def mdct2d(block):
    """Apply MDCT to each row and then each column of the block."""
    return np.apply_along_axis(mdct, 0, np.apply_along_axis(mdct, 1, block))

def imdct2d(block):
    """Apply IMDCT to each column and then each row of the block."""
    return np.apply_along_axis(imdct, 0, np.apply_along_axis(imdct, 1, block))

In [16]:
def compress_image_mdct(image, block_size=16, window_func='hann'):
    compressed_image = np.zeros_like(image)
    blocks_per_row = image.shape[0] // block_size
    blocks_per_col = image.shape[1] // block_size

    for i in range(blocks_per_row):
        for j in range(blocks_per_col):
            block = image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
            compressed_block = mdct2d(block.flatten(), window_func)
            compressed_image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size] = compressed_block.reshape(block_size, block_size)

    return compressed_image

def decompress_image_mdct(compressed_image, block_size=16, window_func='hann'):
    decompressed_image = np.zeros_like(compressed_image)
    blocks_per_row = compressed_image.shape[0] // block_size
    blocks_per_col = compressed_image.shape[1] // block_size

    for i in range(blocks_per_row):
        for j in range(blocks_per_col):
            block = compressed_image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
            decompressed_block = imdct2d(block.flatten(), window_func)
            decompressed_image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size] = decompressed_block.reshape(block_size, block_size)

    return decompressed_image

compressed_images_mdct = [compress_image_mdct(img) for img in images]
decompressed_images_mdct = [decompress_image_mdct(img) for img in compressed_images_mdct]


TypeError: mdct2d() takes 1 positional argument but 2 were given

In [None]:
def compress_image_dct(image, block_size=16):
    compressed_image = np.zeros_like(image)
    blocks_per_row = image.shape[0] // block_size
    blocks_per_col = image.shape[1] // block_size

    for i in range(blocks_per_row):
        for j in range(blocks_per_col):
            block = image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
            compressed_block = dct2(block)
            compressed_image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size] = compressed_block

    return compressed_image

def decompress_image_dct(compressed_image, block_size=16):
    decompressed_image = np.zeros_like(compressed_image)
    blocks_per_row = compressed_image.shape[0] // block_size
    blocks_per_col = compressed_image.shape[1] // block_size

    for i in range(blocks_per_row):
        for j in range(blocks_per_col):
            block = compressed_image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
            decompressed_block = idct2(block)
            decompressed_image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size] = decompressed_block

    return decompressed_image

compressed_images_dct = [compress_image_dct(img) for img in images]
decompressed_images_dct = [decompress_image_dct(img) for img in compressed_images_dct]


In [None]:
!git clone https://github.com/richzhang/PerceptualSimilarity
%cd PerceptualSimilarity
!pip install -r requirements.txt
!python setup.py develop


In [None]:
import lpips
import torch
import numpy as np

loss_fn = lpips.LPIPS(net='alex')  # Can also use 'vgg'

def calculate_metrics(original_images, decompressed_images):
    bpp = []
    psnr = []
    perceptual_similarity = []

    for orig, decomp in zip(original_images, decompressed_images):
        orig = torch.from_numpy(orig).unsqueeze(0).unsqueeze(0).float()
        decomp = torch.from_numpy(decomp).unsqueeze(0).unsqueeze(0).float()

        # Calculate Perceptual Similarity
        perceptual_similarity.append(loss_fn(orig, decomp).item())

        # Calculate Bits Per Pixel
        compressed_size = np.count_nonzero(decomp.numpy()) * 16  # Assuming 16-bit compressed data
        bpp.append(compressed_size / orig.numel())

        # Calculate PSNR
        mse = np.mean((orig.numpy() - decomp.numpy()) ** 2)
        psnr.append(20 * np.log10(255.0 / np.sqrt(mse)))

    return np.mean(bpp), np.mean(psnr), np.mean(perceptual_similarity)




In [None]:
# Calculate metrics for MDCT
bpp_mdct, psnr_mdct, perceptual_similarity_mdct = calculate_metrics(images, decompressed_images_mdct)
print(f'MDCT - Bits Per Pixel: {bpp_mdct}, PSNR: {psnr_mdct}, Perceptual Similarity: {perceptual_similarity_mdct}')

# Calculate metrics for DCT
bpp_dct, psnr_dct, perceptual_similarity_dct = calculate_metrics(images, decompressed_images_dct)
print(f'DCT - Bits Per Pixel: {bpp_dct}, PSNR: {psnr_dct}, Perceptual Similarity: {perceptual_similarity_dct}')