In [1]:
from tensorflow.keras import backend as K
K.clear_session()

In [2]:
!pip install rarfile

Collecting rarfile
  Downloading rarfile-4.2-py3-none-any.whl.metadata (4.4 kB)
Downloading rarfile-4.2-py3-none-any.whl (29 kB)
Installing collected packages: rarfile
Successfully installed rarfile-4.2


In [3]:
import os
import pandas as pd
import gdown
import rarfile
import shutil
from PIL import Image

# Corrected download URL format
url = 'https://drive.google.com/uc?id=1YGe4VubSw5Pt1FrutEK0VgV7eKjcQ4sA'

output_rar = '/content/rice.rar'
gdown.download(url, output_rar, quiet=False, fuzzy=True)

# Extract the RAR file
try:
    with rarfile.RarFile(output_rar, 'r') as rar_ref:
        rar_ref.extractall('/content')
except rarfile.NotRarFile:
    print("The downloaded file is not a valid RAR file.")


Downloading...
From (original): https://drive.google.com/uc?id=1YGe4VubSw5Pt1FrutEK0VgV7eKjcQ4sA
From (redirected): https://drive.google.com/uc?id=1YGe4VubSw5Pt1FrutEK0VgV7eKjcQ4sA&confirm=t&uuid=25e39c17-cd13-4d51-8dd1-9abec85b4fb8
To: /content/rice.rar
100%|██████████| 1.87G/1.87G [00:23<00:00, 80.4MB/s]


In [4]:
# Path to the extracted dataset original folder
dataset = '/content/rice'

In [22]:
import gdown
import rarfile

# File ID from the provided Google Drive link
file_id = "1fOFS9igMw1Vo8n64IZBBJHp2NXFi_tT3"
file_url = f"https://drive.google.com/uc?id={file_id}"

output_rar = '/content/preprocessed_data.rar'
gdown.download(file_url, output_rar, quiet=False)

# Extract the RAR file
try:
    with rarfile.RarFile(output_rar, 'r') as rar_ref:
        rar_ref.extractall('/content')
    print("Extraction completed successfully.")
except rarfile.NotRarFile:
    print("The downloaded file is not a valid RAR file.")


Downloading...
From (original): https://drive.google.com/uc?id=1fOFS9igMw1Vo8n64IZBBJHp2NXFi_tT3
From (redirected): https://drive.google.com/uc?id=1fOFS9igMw1Vo8n64IZBBJHp2NXFi_tT3&confirm=t&uuid=382ea72d-06c5-4e03-a74c-77acb28fcd88
To: /content/preprocessed_data.rar
100%|██████████| 1.22G/1.22G [00:11<00:00, 111MB/s]


Extraction completed successfully.


In [23]:
# Path to the extracted dataset original folder
dataset = '/content/preprocessed_data'

In [24]:
import cv2
import os
from skimage.metrics import structural_similarity as ssim
import numpy as np

# Paths to original and preprocessed datasets
original_dataset_path = '/content/rice'
preprocessed_dataset_path = '/content/preprocessed_data'

# Function to calculate PSNR
def calculate_psnr(original, processed):
    mse = np.mean((original - processed) ** 2)
    if mse == 0:  # No noise
        return 100
    psnr = 10 * np.log10(255.0 ** 2 / mse)
    return psnr

# Function to calculate SSIM
def calculate_ssim(original, processed):
    return ssim(original, processed, data_range=processed.max() - processed.min(), multichannel=True)

# Loop through images in both folders
results = []
for root, dirs, files in os.walk(original_dataset_path):
    for file_name in files:
        # Load original and preprocessed images
        original_img_path = os.path.join(root, file_name)
        processed_img_path = original_img_path.replace(original_dataset_path, preprocessed_dataset_path)

        if os.path.exists(processed_img_path):
            original_img = cv2.imread(original_img_path)
            processed_img = cv2.imread(processed_img_path)

            # Resize images to match if needed
            if original_img.shape != processed_img.shape:
                processed_img = cv2.resize(processed_img, (original_img.shape[1], original_img.shape[0]))

            # Calculate PSNR and SSIM
            psnr_value = calculate_psnr(original_img, processed_img)
            ssim_value = calculate_ssim(original_img, processed_img)

            # Append results
            results.append({
                'file': file_name,
                'psnr': psnr_value,
                'ssim': ssim_value
            })
            print(f"File: {file_name}, PSNR: {psnr_value}, SSIM: {ssim_value}")


In [None]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.metrics import structural_similarity as ssim

# Path to the original dataset
dataset_path = '/content/rice'

# Function to calculate PSNR
def calculate_psnr(image1, image2):
    mse = np.mean((image1 - image2) ** 2)
    if mse == 0:  # Identical images
        return 100
    psnr = 10 * np.log10(255.0 ** 2 / mse)
    return psnr

# Function to calculate SSIM
def calculate_ssim(image1, image2):
    return ssim(image1, image2, data_range=image2.max() - image2.min(), multichannel=True)

# Lists to store PSNR and SSIM values
psnr_values = []
ssim_values = []

# Process images in each class folder within Train, Validation, and Test
for split in ["Train", "Validation", "Test"]:
    for cls_folder in os.listdir(os.path.join(dataset_path, split)):
        class_path = os.path.join(dataset_path, split, cls_folder)
        if not os.path.isdir(class_path):
            continue

        # Load images and calculate PSNR, SSIM between consecutive pairs for baseline
        images = [cv2.imread(os.path.join(class_path, img)) for img in os.listdir(class_path) if img.endswith(('.jpg', '.png'))]
        images = [img for img in images if img is not None]

        for i in range(len(images) - 1):
            img1, img2 = images[i], images[i + 1]

            # Resize images to the same shape if needed
            if img1.shape != img2.shape:
                img2 = cv2.resize(img2, (img1.shape[1], img1.shape[0]))

            # Calculate PSNR and SSIM
            psnr = calculate_psnr(img1, img2)
            psnr_values.append(psnr)

            ssim_score = calculate_ssim(img1, img2)
            ssim_values.append(ssim_score)

# Plotting PSNR and SSIM values
plt.figure(figsize=(12, 5))

# Plot PSNR
plt.subplot(1, 2, 1)
plt.plot(range(len(psnr_values)), psnr_values, label="PSNR", color='blue', marker='o')
plt.xlabel("Image Pair Index")
plt.ylabel("PSNR (dB)")
plt.title("PSNR across Original Dataset")
plt.legend()

# Plot SSIM
plt.subplot(1, 2, 2)
plt.plot(range(len(ssim_values)), ssim_values, label="SSIM", color='green', marker='o')
plt.xlabel("Image Pair Index")
plt.ylabel("SSIM")
plt.title("SSIM across Original Dataset")
plt.legend()

plt.tight_layout()
plt.show()

# Print average PSNR and SSIM
print(f"Average PSNR: {np.mean(psnr_values):.2f}")
print(f"Average SSIM: {np.mean(ssim_values):.4f}")


In [None]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.metrics import structural_similarity as ssim

# Paths to original and preprocessed datasets
original_dataset_path = '/content/rice'
preprocessed_dataset_path = '/content/preprocessed_data'

# Lists to store PSNR and SSIM values for each preprocessing type
psnr_values = {"Gamma Correction": [], "Histogram Equalization": [], "Contrast Stretching": [], "CLAHE": []}
ssim_values = {"Gamma Correction": [], "Histogram Equalization": [], "Contrast Stretching": [], "CLAHE": []}

# Define a function to calculate PSNR
def calculate_psnr(image1, image2):
    mse = np.mean((image1 - image2) ** 2)
    if mse == 0:  # Identical images
        return 100
    psnr = 10 * np.log10(255.0 ** 2 / mse)
    return psnr

# Define a function to calculate SSIM
def calculate_ssim(image1, image2):
    return ssim(image1, image2, data_range=image2.max() - image2.min(), multichannel=True)

# Loop through each image in the original dataset
for split in ["Train", "Validation", "Test"]:
    for cls_folder in os.listdir(os.path.join(original_dataset_path, split)):
        class_path_original = os.path.join(original_dataset_path, split, cls_folder)
        class_path_processed = os.path.join(preprocessed_dataset_path, split, cls_folder)

        if not os.path.isdir(class_path_original) or not os.path.isdir(class_path_processed):
            continue

        for img_file in os.listdir(class_path_original):
            original_img_path = os.path.join(class_path_original, img_file)
            processed_img_path = os.path.join(class_path_processed, img_file)

            # Load both original and processed images
            original_img = cv2.imread(original_img_path)
            processed_img = cv2.imread(processed_img_path)

            # Skip if either image is missing
            if original_img is None or processed_img is None:
                continue

            # Resize processed image to match original if needed
            if original_img.shape != processed_img.shape:
                processed_img = cv2.resize(processed_img, (original_img.shape[1], original_img.shape[0]))

            # Identify preprocessing type based on folder naming
            if "gamma_corrected" in processed_img_path:
                preprocessing_type = "Gamma Correction"
            elif "histogram_equalized" in processed_img_path:
                preprocessing_type = "Histogram Equalization"
            elif "contrast_stretched" in processed_img_path:
                preprocessing_type = "Contrast Stretching"
            elif "clahe_eq" in processed_img_path:
                preprocessing_type = "CLAHE"
            else:
                continue  # Skip unrecognized preprocessing

            # Calculate PSNR and SSIM between original and processed image
            psnr = calculate_psnr(original_img, processed_img)
            ssim_score = calculate_ssim(original_img, processed_img)

            # Append the scores to the respective preprocessing lists
            psnr_values[preprocessing_type].append(psnr)
            ssim_values[preprocessing_type].append(ssim_score)

# Plotting PSNR and SSIM values for each preprocessing type
plt.figure(figsize=(14, 6))

# Plot PSNR for each preprocessing type
plt.subplot(1, 2, 1)
for key in psnr_values:
    plt.plot(range(len(psnr_values[key])), psnr_values[key], label=f"{key} PSNR", marker='o')
plt.xlabel("Image Index")
plt.ylabel("PSNR (dB)")
plt.title("PSNR for Each Preprocessing Type")
plt.legend()

# Plot SSIM for each preprocessing type
plt.subplot(1, 2, 2)
for key in ssim_values:
    plt.plot(range(len(ssim_values[key])), ssim_values[key], label=f"{key} SSIM", marker='o')
plt.xlabel("Image Index")
plt.ylabel("SSIM")
plt.title("SSIM for Each Preprocessing Type")
plt.legend()

plt.tight_layout()
plt.show()

# Print average PSNR and SSIM for each preprocessing type
for key in psnr_values:
    print(f"{key} - Average PSNR: {np.mean(psnr_values[key]):.2f}")
    print(f"{key} - Average SSIM: {np.mean(ssim_values[key]):.4f}")


In [32]:
import os

# Paths to original and preprocessed datasets
original_dataset_path = '/content/rice'
preprocessed_dataset_path = '/content/preprocessed_data'

# Function to count files in each class subfolder
def count_files(base_path, dataset_name):
    print(f"\nFile count for {dataset_name}:")
    total_files = 0
    for root, _, files in os.walk(base_path):
        # Only count files in leaf class subfolders, ignore main folders like Train, Test, Validation
        if any(subdir in root for subdir in ['Train', 'Validation', 'Test']):
            num_files = len(files)
            print(f"{os.path.basename(root)}: {num_files} files")
            total_files += num_files
    print(f"Total files in {dataset_name}: {total_files}")

# Count files in both datasets
count_files(original_dataset_path, "Original Dataset")
count_files(preprocessed_dataset_path, "Preprocessed Dataset")



File count for Original Dataset:
Train: 0 files
healthy: 1491 files
brown_spot: 1480 files
rice_hispa: 1461 files
bacterial_leaf_blight: 1386 files
leaf_blast: 1801 files
Validation: 0 files
healthy: 110 files
brown_spot: 110 files
Rice Hispa: 110 files
bacterial_leaf_blight: 110 files
leaf_blast: 110 files
Test: 0 files
healthy: 281 files
brown_spot: 270 files
Rice Hispa: 115 files
bacterial_leaf_blight: 266 files
leaf_blast: 252 files
Total files in Original Dataset: 9353

File count for Preprocessed Dataset:
Train: 0 files
healthy: 5964 files
brown_spot: 5920 files
rice_hispa: 5844 files
bacterial_leaf_blight: 5544 files
leaf_blast: 7204 files
Validation: 0 files
healthy: 440 files
brown_spot: 440 files
Rice Hispa: 440 files
bacterial_leaf_blight: 440 files
leaf_blast: 440 files
Test: 0 files
healthy: 1124 files
brown_spot: 1080 files
Rice Hispa: 460 files
bacterial_leaf_blight: 1064 files
leaf_blast: 1008 files
Total files in Preprocessed Dataset: 37412
