# This notebook goal is to preprocess the images

#### This cell deletes the folders that has less than 2000 images.

In [5]:
import os
import shutil

def delete_folders_with_few_images(path, min_images=2000):
    for folder_name in os.listdir(path):
        folder_path = os.path.join(path, folder_name)

        if os.path.isdir(folder_path):
            # Count the number of image files in the folder
            image_count = 0
            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)
                if os.path.isfile(file_path):
                    # Check if the file is an image based on its extension
                    if file_name.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif", ".tiff")):
                        image_count += 1

            # If the folder has fewer than the required number of images, delete it
            if image_count < min_images:
                print(f"Deleting folder: {folder_path} (contains {image_count} images)")
                shutil.rmtree(folder_path)

if __name__ == "__main__":
    # Path to the target directory
    target_path = r"C:\Users\yozev\OneDrive\Desktop\art"

    # Call the function
    delete_folders_with_few_images(target_path)

KeyboardInterrupt: 

#### This part of the code aims to set all of the folders to contain 5000 images
The code will augment images in folders with less than 5000 images and will undersample folders with more than that

In [4]:
import os
import random
import shutil
from PIL import Image, ImageEnhance

def augment_image(image_path, output_path):
    with Image.open(image_path) as img:
        # Resize image to a square
        img_resized = img.resize((256, 256))

        # Flip image horizontally after resizing
        img_flipped = img_resized.transpose(Image.FLIP_LEFT_RIGHT)
        img_flipped.save(os.path.join(output_path, f"{random.randint(1000, 9999)}_flipped.jpg"))

        # Rotate image
        for angle in [90, 180, 270]:
            img_rotated = img_resized.rotate(angle, expand=True)
            img_rotated.save(os.path.join(output_path, f"{random.randint(1000, 9999)}_rotated_{angle}.jpg"))

        # Adjust brightness
        enhancer = ImageEnhance.Brightness(img_resized)
        img_bright = enhancer.enhance(1.5)  # Increase brightness by 1.5x
        img_bright.save(os.path.join(output_path, f"{random.randint(1000, 9999)}_bright.jpg"))

        # Adjust contrast
        enhancer = ImageEnhance.Contrast(img_resized)
        img_contrast = enhancer.enhance(1.5)  # Increase contrast by 1.5x
        img_contrast.save(os.path.join(output_path, f"{random.randint(1000, 9999)}_contrast.jpg"))

def process_folders(input_path, output_path, min_files=5000):
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    for folder_name in os.listdir(input_path):
        folder_path = os.path.join(input_path, folder_name)
        new_folder_path = os.path.join(output_path, folder_name)

        # Process only folders
        if os.path.isdir(folder_path):
            os.makedirs(new_folder_path, exist_ok=True)

            # List all image files
            image_files = [f for f in os.listdir(folder_path) if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff"))]
            num_images = len(image_files)

            print(f"Processing folder: {folder_name}, Images: {num_images}")

            if num_images > min_files:
                # Downsample folder
                downsampled_files = random.sample(image_files, min_files)
                for file in downsampled_files:
                    shutil.copy(os.path.join(folder_path, file), new_folder_path)
                print(f"Downsampled to {min_files} images.")

            else:
                # Copy all existing images and augment
                current_count = 0
                for file in image_files:
                    shutil.copy(os.path.join(folder_path, file), new_folder_path)
                    current_count += 1

                # Augment images until `min_files` is reached
                while current_count < min_files:
                    image_to_augment = random.choice(image_files)
                    augment_image(os.path.join(folder_path, image_to_augment), new_folder_path)
                    current_count += 6  # Each augmentation generates 6 images

                print(f"Augmented folder to {min_files} images.")

if __name__ == "__main__":
    # Input and output paths
    input_path = r"C:\Users\yozev\OneDrive\Desktop\art"
    output_path = r"C:\Users\yozev\OneDrive\Desktop\artFiltered"

    # Process folders
    process_folders(input_path, output_path)


Processing folder: Abstract_Expressionism, Images: 2782
Augmented folder to 5000 images.
Processing folder: Art_Nouveau_Modern, Images: 4334
Augmented folder to 5000 images.
Processing folder: Baroque, Images: 4240
Augmented folder to 5000 images.
Processing folder: Cubism, Images: 2235
Augmented folder to 5000 images.
Processing folder: Expressionism, Images: 6736
Downsampled to 5000 images.
Processing folder: Impressionism, Images: 13060
Downsampled to 5000 images.
Processing folder: Naive_Art_Primitivism, Images: 2405
Augmented folder to 5000 images.
Processing folder: Northern_Renaissance, Images: 2552
Augmented folder to 5000 images.
Processing folder: Post_Impressionism, Images: 6450
Downsampled to 5000 images.
Processing folder: Realism, Images: 10733
Downsampled to 5000 images.
Processing folder: Rococo, Images: 2089
Augmented folder to 5000 images.
Processing folder: Romanticism, Images: 7019
Downsampled to 5000 images.
Processing folder: Symbolism, Images: 4528
Augmented fold

In [6]:
import os
from PIL import Image

def resize_images_in_folder(input_path, output_size=(256, 256)):
    for folder_name in os.listdir(input_path):
        folder_path = os.path.join(input_path, folder_name)

        if os.path.isdir(folder_path):
            print(f"Resizing images in folder: {folder_name}")

            for file_name in os.listdir(folder_path):
                file_path = os.path.join(folder_path, file_name)

                if os.path.isfile(file_path) and file_name.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff")):
                    try:
                        with Image.open(file_path) as img:
                            img_resized = img.resize(output_size)
                            img_resized.save(file_path)
                    except Exception as e:
                        print(f"Error resizing image {file_name}: {e}")

if __name__ == "__main__":
    # Path to the filtered folder
    input_path = r"C:\Users\yozev\OneDrive\Desktop\artFiltered"

    # Resize images
    resize_images_in_folder(input_path)


Resizing images in folder: Abstract_Expressionism
Resizing images in folder: Art_Nouveau_Modern
Resizing images in folder: Baroque
Resizing images in folder: Cubism
Resizing images in folder: Expressionism
Resizing images in folder: Impressionism
Resizing images in folder: Naive_Art_Primitivism
Resizing images in folder: Northern_Renaissance
Resizing images in folder: Post_Impressionism
Resizing images in folder: Realism
Resizing images in folder: Rococo
Resizing images in folder: Romanticism
Resizing images in folder: Symbolism


Features extractions using the EfficientNet-B0 network

In [1]:
import os
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms

def extract_features_efficientnet_b0(
    input_path,
    output_file="efficientnet_b0_features_pt.npz",
    image_size=(224, 224)
):

    # Use GPU if available, else CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load a pre-trained EfficientNet-B0 model
    model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
    model = model.to(device)

    # Replace the final classification layer with Identity to get raw feature vectors
    if isinstance(model.classifier, nn.Sequential) and len(model.classifier) > 1:
        model.classifier[1] = nn.Identity()
    else:
        # Fallback in case PyTorch changes architecture
        model.classifier = nn.Identity()

    # Switch the model to evaluation mode
    model.eval()

    # Define the transformation pipeline to match EfficientNet’s expected input
    transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])

    # Prepare containers for all features and labels
    all_features = []
    all_labels = []

    # Get list of class folders
    class_folders = [
        d for d in os.listdir(input_path)
        if os.path.isdir(os.path.join(input_path, d))
    ]

    # Traverse each subfolder (class)
    for label_idx, folder_name in enumerate(class_folders):
        folder_path = os.path.join(input_path, folder_name)
        print(f"Extracting features from folder: {folder_name}")

        # Gather image files
        image_files = [
            f for f in os.listdir(folder_path)
            if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tiff"))
        ]

        for file_name in image_files:
            file_path = os.path.join(folder_path, file_name)
            try:
                # Open and transform the image
                with Image.open(file_path) as img:
                    img_t = transform(img)

                img_t = img_t.unsqueeze(0).to(device)

                with torch.no_grad():
                    # Extract features
                    features = model(img_t)

                features_np = features.squeeze(0).cpu().numpy()

                all_features.append(features_np)
                all_labels.append(label_idx)

            except Exception as e:
                print(f"Error processing {file_name} in {folder_name}: {e}")

    all_features = np.array(all_features)
    all_labels = np.array(all_labels)

    np.savez(output_file, features=all_features, labels=all_labels)
    print(f"Features saved to {output_file}")


if __name__ == "__main__":
    input_path = r"C:\Users\yozev\OneDrive\Desktop\artFiltered"
    output_file = "efficientnet_b0_features_pt.npz"

    extract_features_efficientnet_b0(input_path, output_file)

Extracting features from folder: Abstract_Expressionism
Extracting features from folder: Art_Nouveau_Modern
Extracting features from folder: Baroque
Extracting features from folder: Cubism
Extracting features from folder: Expressionism
Extracting features from folder: Impressionism
Extracting features from folder: Naive_Art_Primitivism
Extracting features from folder: Northern_Renaissance
Extracting features from folder: Post_Impressionism
Extracting features from folder: Realism
Extracting features from folder: Rococo
Extracting features from folder: Romanticism
Extracting features from folder: Symbolism
Features saved to efficientnet_b0_features_pt.npz
