In [None]:
!pip install pandas opencv-python-headless




In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [13]:
import zipfile
import os


zip_path = '/content/drive/MyDrive/Dataset_pro/Dataset.zip'
extract_path = './Dataset'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)


all_numbers_path = os.path.join(extract_path, 'AllNumbers')


if os.path.exists(all_numbers_path):
    print("Files extracted to:", all_numbers_path)
    print(os.listdir(all_numbers_path))
else:
    print(f"'AllNumbers' folder not found inside {extract_path}")


Files extracted to: ./Dataset/AllNumbers
['Two_full (1077).jpg', 'Zero_full (687).jpg', 'Zero_full (665).jpg', 'Eight_full (45).jpg', 'Five_full (1416).jpg', 'Three_full (677).jpg', 'Eight_full (677).jpg', 'Six_full (954).jpg', 'Five_full (736).jpg', 'Nine_full (1294).jpg', 'Six_full (39).jpg', 'Three_full (37).jpg', 'Nine_full (45).jpg', 'Two_full (1454).jpg', 'Six_full (1394).jpg', 'Four_full (711).jpg', 'Zero_full (893).jpg', 'Two_full (1455).jpg', 'Seven_full (39).jpg', 'Two_full (916).jpg', 'Three_full (696).jpg', 'Three_full (691).jpg', 'Two_full (1176).jpg', 'Zero_full (172).jpg', 'Four_full (978).jpg', 'One_full (751).jpg', 'Six_full (718).jpg', 'Five_full (280).jpg', 'Four_full (989).jpg', 'Nine_full (271).jpg', 'Eight_full (682).jpg', 'One_full (692).jpg', 'Three_full (715).jpg', 'Eight_full (40).jpg', 'Seven_full (977).jpg', 'Eight_full (1368).jpg', 'Eight_full (43).jpg', 'Four_full (1005).jpg', 'One_full (34).jpg', 'Two_full (1444).jpg', 'Nine_full (651).jpg', 'Zero_full (8

In [15]:
import zipfile
import os
import numpy as np
import pandas as pd
import cv2
from numpy.linalg import svd
from statistics import mean


zip_path = '/content/drive/MyDrive/Dataset_pro/Dataset.zip'
extract_path = './Dataset'


with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)


input_folder = os.path.join(extract_path, 'AllNumbers')


if not os.path.exists(input_folder):
    raise FileNotFoundError(f"'AllNumbers' folder not found inside {extract_path}")

# Function to compress an image using SVD
def compress_image_svd(image, k):
    U, S, Vt = svd(image, full_matrices=False)
    S_k = np.diag(S[:k])
    U_k = U[:, :k]
    V_k = Vt[:k, :]
    return np.dot(U_k, np.dot(S_k, V_k))

# Function to evaluate compression quality
def evaluate_compression_quality(original, compressed):
    mse = np.mean((original - compressed) ** 2)
    psnr = 20 * np.log10(1.0 / np.sqrt(mse)) if mse != 0 else float('inf')
    return mse, psnr

# Define range of k values and corresponding output folders
k_values = [10, 20, 30, 40, 50]
output_folders = [f"K-{k}" for k in k_values]

# DataFrame to store overall results for all k values
overall_results = []

# Loop through each k value and corresponding output folder
for k, output_folder in zip(k_values, output_folders):
    os.makedirs(output_folder, exist_ok=True)

    # DataFrame to store results for this k
    results = []

    # Get list of image files
    image_files = [f for f in os.listdir(input_folder) if f.endswith(".jpg")]
    if not image_files:
        print(f"No images found in the input folder: {input_folder}")
        continue

    # Compress images in the specified folder
    for image_file in image_files:
        filepath = os.path.join(input_folder, image_file)
        img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
        img = img / 255.0  # Normalize pixel values to [0, 1]

        m, n = img.shape
        compressed_img = compress_image_svd(img, k)
        compressed_img = np.clip(compressed_img, 0, 1)

        # Evaluate quality
        mse, psnr = evaluate_compression_quality(img, compressed_img)
        original_size = m * n
        compressed_size = m * k + k * k + n * k
        compression_ratio = original_size / compressed_size

        # Store results
        results.append((image_file, mse, psnr, compression_ratio))

        # Save compressed image
        compressed_filepath = os.path.join(output_folder, image_file)
        cv2.imwrite(compressed_filepath, (compressed_img * 255).astype(np.uint8))

    # Save results for this k to an Excel file
    results_df = pd.DataFrame(results, columns=["ImageName", "MSE", "PSNR", "CompressionRatio"])
    excel_file = os.path.join(output_folder, f"results_k{k}.xlsx")
    results_df.to_excel(excel_file, index=False)

    # Calculate overall averages for this k
    mse_avg = mean(results_df["MSE"])
    psnr_avg = mean(results_df["PSNR"])
    compression_ratio_avg = mean(results_df["CompressionRatio"])

    print(f"\nResults for k = {k}:")
    print(f"MSE: {mse_avg}")
    print(f"PSNR: {psnr_avg} dB")
    print(f"Compression Ratio: {compression_ratio_avg}")

    overall_results.append((k, mse_avg, psnr_avg, compression_ratio_avg))

# Check if overall_results is populated
if not overall_results:
    print("No results were recorded. Please ensure images are present in the input folder.")
else:
    overall_results_df = pd.DataFrame(overall_results, columns=["K", "MSE", "PSNR", "CompressionRatio"])

    # Save overall results to a single Excel file
    overall_results_file = './overall_results.xlsx'
    overall_results_df.to_excel(overall_results_file, index=False)
    print(f"\nOverall results saved to {overall_results_file}")

    # Normalize metrics for combined score calculation
    mse_normalized = 1 - (overall_results_df["MSE"] - overall_results_df["MSE"].min()) / (
            overall_results_df["MSE"].max() - overall_results_df["MSE"].min())
    psnr_normalized = (overall_results_df["PSNR"] - overall_results_df["PSNR"].min()) / (
            overall_results_df["PSNR"].max() - overall_results_df["PSNR"].min())
    compression_ratio_normalized = (overall_results_df["CompressionRatio"] - overall_results_df[
        "CompressionRatio"].min()) / (
                                            overall_results_df["CompressionRatio"].max() - overall_results_df[
                                        "CompressionRatio"].min())

    # Weights for each metric
    w_mse = 0.3  # Lower MSE is more important
    w_psnr = 0.3  # Higher PSNR is important
    w_compression = 0.4  # Compression ratio is also important

    # Calculate combined score for each K
    overall_results_df["Score"] = w_mse * mse_normalized + w_psnr * psnr_normalized + w_compression * compression_ratio_normalized

    # Find the best K
    best_row = overall_results_df["Score"].idxmax()
    best_k = overall_results_df.loc[best_row, "K"]

    print(f"\nBest K: {best_k}")
    print(f"Metrics for Best K:")
    print(f"MSE: {overall_results_df.loc[best_row, 'MSE']}")
    print(f"PSNR: {overall_results_df.loc[best_row, 'PSNR']} dB")
    print(f"Compression Ratio: {overall_results_df.loc[best_row, 'CompressionRatio']}")



Results for k = 10:
MSE: 0.0014877845054030068
PSNR: 30.35760134604769 dB
Compression Ratio: 5.25

Results for k = 20:
MSE: 0.00016618244290246247
PSNR: 41.1479035266804 dB
Compression Ratio: 2.52

Results for k = 30:
MSE: 3.512260480260018e-05
PSNR: 48.683321610456915 dB
Compression Ratio: 1.6153846153846154

Results for k = 40:
MSE: 8.282267192745092e-06
PSNR: 55.10738575179384 dB
Compression Ratio: 1.1666666666666667

Results for k = 50:
MSE: 1.9043704659439984e-06
PSNR: 61.59105031008055 dB
Compression Ratio: 0.9

Overall results saved to ./overall_results.xlsx

Best K: 50
Metrics for Best K:
MSE: 1.9043704659439984e-06
PSNR: 61.59105031008055 dB
Compression Ratio: 0.9


In [14]:
from google.colab import drive
drive.mount('/content/drive')


!cp ./overall_results.xlsx "/content/drive/MyDrive/"
print("File copied to Google Drive: /content/drive/MyDrive/overall_results.xlsx")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
File copied to Google Drive: /content/drive/MyDrive/overall_results.xlsx
