In [1]:
import os
import glob
import numpy as np
from PIL import Image
from google.colab import drive

In [2]:
# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Define dataset paths
base_path = '/content/drive/MyDrive/DATASET'
splits = {
    'Train': os.path.join(base_path, 'Train'),
    'Validate': os.path.join(base_path, 'Validate'),
    'Test': os.path.join(base_path, 'Test')
}
sigma = 15  # Noise level σ = 15
image_size = (128, 128)  # Resize to 128x128

In [4]:
# Verify folder existence
for split, path in splits.items():
    if not os.path.exists(path):
        raise FileNotFoundError(f"Folder not found: {path}. Run the path listing code to confirm the correct path.")
    print(f"Found {split} folder: {path}")

Found Train folder: /content/drive/MyDrive/DATASET/Train
Found Validate folder: /content/drive/MyDrive/DATASET/Validate
Found Test folder: /content/drive/MyDrive/DATASET/Test


In [5]:
# Function to add Gaussian noise
def add_gaussian_noise(image, sigma):
    img_array = np.array(image).astype(np.float32) / 255.0
    noise = np.random.normal(0, sigma / 255.0, img_array.shape)
    noisy_img = np.clip(img_array + noise, 0, 1) * 255.0
    return Image.fromarray(noisy_img.astype(np.uint8))

In [6]:
# Loop through dataset splits
for split in splits:
    clean_path = splits[split]
    # Collect clean images recursively (JPG or PNG)
    clean_files = sorted(glob.glob(os.path.join(clean_path, '**', '*.jpg'), recursive=True) +
                         glob.glob(os.path.join(clean_path, '**', '*.png'), recursive=True))
    clean_files = [f for f in clean_files if 'noisy_sigma' not in f]

    if not clean_files:
        raise ValueError(f"No images found in {clean_path}")

    print(f"Processing {split} with sigma={sigma} ({len(clean_files)} images)...")

    for clean_file in clean_files:
        # Load and resize image to 128x128 in RGB
        clean_img = Image.open(clean_file).convert('RGB').resize(image_size, Image.Resampling.LANCZOS)

        # Add Gaussian noise
        noisy_img = add_gaussian_noise(clean_img, sigma)

        # Construct output path
        rel_path = os.path.relpath(clean_file, clean_path)
        noisy_dir = os.path.join(clean_path, f'noisy_sigma{sigma}', os.path.dirname(rel_path))
        os.makedirs(noisy_dir, exist_ok=True)

        # Save noisy image as PNG
        output_file = os.path.join(noisy_dir, os.path.basename(clean_file).rsplit('.', 1)[0] + '.png')
        noisy_img.save(output_file, 'PNG')

        # Optionally resize and save clean image as PNG
        clean_output_file = os.path.join(clean_path, rel_path.rsplit('.', 1)[0] + '.png')
        os.makedirs(os.path.dirname(clean_output_file), exist_ok=True)
        clean_img.save(clean_output_file, 'PNG')

    print(f"Saved {len(clean_files)} noisy (sigma={sigma}) and clean images to {clean_path}")

print("All done! Noisy and clean RGB images (128x128, sigma=15) are saved in each split.")

Processing Train with sigma=15 (7998 images)...
Saved 7998 noisy (sigma=15) and clean images to /content/drive/MyDrive/DATASET/Train
Processing Validate with sigma=15 (1001 images)...
Saved 1001 noisy (sigma=15) and clean images to /content/drive/MyDrive/DATASET/Validate
Processing Test with sigma=15 (1003 images)...
Saved 1003 noisy (sigma=15) and clean images to /content/drive/MyDrive/DATASET/Test
All done! Noisy and clean RGB images (128x128, sigma=15) are saved in each split.
