In [1]:
# Imports
import os
import shutil
from torchvision import transforms
from PIL import Image, ImageEnhance, ImageOps
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision.utils import save_image
from torchvision import datasets
from torch.utils.data import DataLoader
import warnings
import numpy as np
import cv2

In [2]:
# Variables
# Define directories
SOURCE_DIRS = ['./images', './augmented_images']  # List of source directories containing your original and augmented images
OUTPUT_DIR = './preprocessed_images'  # Destination directory for preprocessed images

images_by_label = {}

labels = {}

batch_size = 64

# Suppress the DecompressionBombWarning
warnings.simplefilter('ignore', Image.DecompressionBombWarning)

In [3]:
# Locate all classes in the source directories
for SOURCE_DIR in SOURCE_DIRS:
    for class_name in os.listdir(SOURCE_DIR):
        class_path = os.path.join(SOURCE_DIR, class_name)
        if os.path.isdir(class_path):
            if class_name not in labels:
                label_name = class_name + "_" + SOURCE_DIR
                labels[class_name] = []
            print(f'Found class {class_name} in {SOURCE_DIR}')

Found class Paper in ./images
Found class Rock in ./images
Found class Scissor in ./images
Found class Paper in ./augmented_images
Found class Rock in ./augmented_images
Found class Scissor in ./augmented_images


In [4]:
# Load the data
for SOURCE_DIR in SOURCE_DIRS:
    for class_name in os.listdir(SOURCE_DIR):
        class_path = os.path.join(SOURCE_DIR, class_name)
        loader_name = class_name + "_" + SOURCE_DIR
        if not os.path.isdir(class_path):
            continue  # Skip if not a directory

        # Initialize list for the class if not already present
        if class_name not in images_by_label:
            images_by_label[loader_name] = []

        # Traverse images in the class folder
        for file_name in os.listdir(class_path):
            if file_name.endswith(('.png', '.jpg', '.jpeg')):  # Check for valid image extensions
                file_path = os.path.join(class_path, file_name)

                # Load the image
                image = Image.open(file_path)

                # Append the image to the corresponding label's list
                images_by_label[loader_name].append(image)

# Print summary
for label, images in images_by_label.items():
    print(f"Loaded {len(images)} images for label '{label}'.")


Loaded 40 images for label 'Paper_./images'.
Loaded 54 images for label 'Rock_./images'.
Loaded 39 images for label 'Scissor_./images'.
Loaded 280 images for label 'Paper_./augmented_images'.
Loaded 378 images for label 'Rock_./augmented_images'.
Loaded 273 images for label 'Scissor_./augmented_images'.


In [5]:
def resize_with_aspect_ratio(image, size):
    # Resize the image while maintaining the aspect ratio
    wpercent = (size / float(image.size[0]))
    hsize = int((float(image.size[1]) * float(wpercent)))
    img = image.resize((size, hsize), Image.Resampling.LANCZOS)
    return img

In [6]:
def auto_exposure(img):
    # Convert the image to a numpy array
    img_array = np.array(img)

    # Split into R, G, B channels
    r, g, b = cv2.split(img_array)

    # Apply CLAHE to each channel
    clahe = cv2.createCLAHE(clipLimit=25.0, tileGridSize=(8, 8))
    r = clahe.apply(r)
    g = clahe.apply(g)
    b = clahe.apply(b)

    # Merge the channels back together
    img_array = cv2.merge([r, g, b])

    # Convert back to PIL Image
    final_img = Image.fromarray(img_array)

    return final_img

In [7]:
# Define preprocessing pipeline
preprocess_pipeline_training = transforms.Compose([
    transforms.Lambda(lambda img: resize_with_aspect_ratio(img, 256)),  # Resize the image while maintaining the aspect ratio
    transforms.CenterCrop(256),  # Center crop the image
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Random color jitter for generalization
    transforms.Lambda(lambda img: auto_exposure(img)),  # Apply auto exposure
    transforms.ToTensor(),  # Convert the image to PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize the image
])

preprocess_pipeline_testing = transforms.Compose([
    transforms.Lambda(lambda img: resize_with_aspect_ratio(img, 256)),  # Resize the image while maintaining the aspect ratio
    transforms.CenterCrop(256),  # Center crop the image
    transforms.Lambda(lambda img: auto_exposure(img)),  # Apply auto exposure
    transforms.ToTensor(),  # Convert the image to PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize the image
])



In [8]:
# Dictionary to store combined datasets for each class
datasets_by_source_and_class = {SOURCE_DIR: {class_name: [] for class_name in labels.keys()} for SOURCE_DIR in SOURCE_DIRS}

# Accumulate datasets for each class
for SOURCE_DIR in SOURCE_DIRS:
    for class_name in labels.keys():
        class_path = os.path.join(SOURCE_DIR, class_name)
        print(class_path)
        if not os.path.isdir(class_path):
            continue  # Skip if not a directory

        # Use ImageFolder with a structure that ImageFolder expects
        dataset = ImageFolder(
        root=SOURCE_DIR, 
        transform=preprocess_pipeline_testing if SOURCE_DIR == './images' else preprocess_pipeline_training
        )


        # Filter dataset to include only images belonging to the current class
        class_indices = [i for i, (_, label) in enumerate(dataset.samples) if dataset.classes[label] == class_name]
        filtered_dataset = torch.utils.data.Subset(dataset, class_indices)

        datasets_by_source_and_class[SOURCE_DIR][class_name].append(filtered_dataset)

        print(f"Preprocessed {len(filtered_dataset)} images for class '{SOURCE_DIR} {class_name}'.")

./images\Paper
Preprocessed 40 images for class './images Paper'.
./images\Rock
Preprocessed 54 images for class './images Rock'.
./images\Scissor
Preprocessed 39 images for class './images Scissor'.
./augmented_images\Paper
Preprocessed 280 images for class './augmented_images Paper'.
./augmented_images\Rock
Preprocessed 378 images for class './augmented_images Rock'.
./augmented_images\Scissor
Preprocessed 273 images for class './augmented_images Scissor'.


In [9]:
data_loaders_by_class_and_source = {}

for SOURCE_DIR, class_datasets in datasets_by_source_and_class.items():
    for class_name, dataset_list in class_datasets.items():
        # Combine all filtered datasets for this class and source directory
        if len(dataset_list) > 1:
            combined_dataset = torch.utils.data.ConcatDataset(dataset_list)
        else:
            combined_dataset = dataset_list[0]  # If only one dataset, no need to concatenate

        # Create a DataLoader for this class and source directory
        dataloader = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
        
        # Store the dataloader by class and source directory
        data_loaders_by_class_and_source[(class_name, SOURCE_DIR)] = dataloader

        print(f'Loaded {len(combined_dataset)} images for class {class_name} from {SOURCE_DIR}.')


Loaded 40 images for class Paper from ./images.
Loaded 54 images for class Rock from ./images.
Loaded 39 images for class Scissor from ./images.
Loaded 280 images for class Paper from ./augmented_images.
Loaded 378 images for class Rock from ./augmented_images.
Loaded 273 images for class Scissor from ./augmented_images.


In [10]:
# Create the output directory if it doesn't exist
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
else:
    # Delete directory if it already exists
    shutil.rmtree(OUTPUT_DIR)
    os.makedirs(OUTPUT_DIR)

print(f"Created/checked directory: {OUTPUT_DIR}")	

# Create training and test directories
train_dir = os.path.join(OUTPUT_DIR, 'train')
test_dir = os.path.join(OUTPUT_DIR, 'test')

os.makedirs(train_dir)
os.makedirs(test_dir)

print(f"Created directory: {train_dir}")
print(f"Created directory: {test_dir}")


Created/checked directory: ./preprocessed_images
Created directory: ./preprocessed_images\train
Created directory: ./preprocessed_images\test


In [11]:
print("Processing and saving images...")

train_dir = os.path.join(OUTPUT_DIR, 'train')
test_dir = os.path.join(OUTPUT_DIR, 'test')

# Ensure the directories exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

for (class_name, SOURCE_DIR), dataloader in data_loaders_by_class_and_source.items():
    print(f"Processing and saving images for class '{SOURCE_DIR} {class_name}'...")

    if SOURCE_DIR == './images':
        label = 'test'
        class_dir = os.path.join(test_dir, class_name)
    else:
        label = 'train'
        class_dir = os.path.join(train_dir, class_name)

    # Create a subdirectory for the class
    os.makedirs(class_dir, exist_ok=True)

    # Iterate through the DataLoader batches
    for batch_idx, (images, labels) in enumerate(dataloader):
        print(f"Processing batch {batch_idx} of {label}...")
        for img_idx, image in enumerate(images):
            print(f"Processing image {img_idx+(batch_idx*img_idx)}...")

            # Generate a unique filename
            save_path = os.path.join(
                class_dir,
                f"{class_name}_{img_idx+(batch_idx*img_idx):03d}.jpg"
            )

            # Save the preprocessed image
            save_image(image, save_path)
            print(f"Saved: {save_path}")
            
        print(f"Batch {batch_idx} of {label} complete!")

print("Preprocessing complete!")

Processing and saving images...
Processing and saving images for class './images Paper'...
Processing batch 0 of test...
Processing image 0...
Saved: ./preprocessed_images\test\Paper\Paper_000.jpg
Processing image 1...
Saved: ./preprocessed_images\test\Paper\Paper_001.jpg
Processing image 2...
Saved: ./preprocessed_images\test\Paper\Paper_002.jpg
Processing image 3...
Saved: ./preprocessed_images\test\Paper\Paper_003.jpg
Processing image 4...
Saved: ./preprocessed_images\test\Paper\Paper_004.jpg
Processing image 5...
Saved: ./preprocessed_images\test\Paper\Paper_005.jpg
Processing image 6...
Saved: ./preprocessed_images\test\Paper\Paper_006.jpg
Processing image 7...
Saved: ./preprocessed_images\test\Paper\Paper_007.jpg
Processing image 8...
Saved: ./preprocessed_images\test\Paper\Paper_008.jpg
Processing image 9...
Saved: ./preprocessed_images\test\Paper\Paper_009.jpg
Processing image 10...
Saved: ./preprocessed_images\test\Paper\Paper_010.jpg
Processing image 11...
Saved: ./preprocess