In [1]:
#ensure all images are in the genre folder and not in the artist folder

import os
import shutil
import tkinter as tk
from tkinter import filedialog

# Initialize tkinter root
root = tk.Tk()
root.withdraw()  # Hide the main tkinter window

# Open folder selection dialog
dataset_folder = filedialog.askdirectory(title="Select the main dataset folder")

# Check if a folder was selected
if dataset_folder:
    # Traverse through each genre folder in the selected directory
    for genre_folder in os.listdir(dataset_folder):
        genre_path = os.path.join(dataset_folder, genre_folder)

        # Check if the path is a directory (genre folder)
        if os.path.isdir(genre_path):
            # Use os.walk to recursively find all files in subdirectories
            for root, _, files in os.walk(genre_path):
                for file_name in files:
                    file_path = os.path.join(root, file_name)
                    
                    # Define the destination path in the genre folder
                    dest_path = os.path.join(genre_path, file_name)
                    
                    # Check if a file with the same name already exists in the genre folder
                    if os.path.exists(dest_path):
                        # Create a unique filename by adding a counter suffix
                        base_name, ext = os.path.splitext(file_name)
                        counter = 1
                        new_dest_path = os.path.join(genre_path, f"{base_name}_{counter}{ext}")
                        while os.path.exists(new_dest_path):
                            counter += 1
                            new_dest_path = os.path.join(genre_path, f"{base_name}_{counter}{ext}")
                        dest_path = new_dest_path
                    
                    # Move the file to the genre folder
                    shutil.move(file_path, dest_path)

            # Clean up empty directories
            for root, dirs, _ in os.walk(genre_path, topdown=False):
                for dir_name in dirs:
                    dir_path = os.path.join(root, dir_name)
                    if not os.listdir(dir_path):  # Check if directory is empty
                        os.rmdir(dir_path)

    print("Reorganization complete.")
else:
    print("No folder selected.")


Reorganization complete.


Splitting dataset

In [None]:
import torch
print(torch.cuda.is_available())  # For PyTorch GPU acceleration

True


In [None]:
#run this to rename images before splitting the dataset
# Traverse through all files in the dataset folder and subfolders.
# Identify image files based on certain extensions.
# Rename the images by replacing non-alphanumeric characters in the filenames with underscores.
# Convert the image files to PNG format and save them in the same directory.
# Optionally, delete the original image files after conversion.
# Print logs indicating success or failure for each image file processed.

import os
import re
from PIL import Image

# Define the main dataset folder path
dataset_folder = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original"

# Loop through each folder in the dataset directory
for root, dirs, files in os.walk(dataset_folder):
    for file_name in files:
        # Get the current file's full path
        file_path = os.path.join(root, file_name)
        
        # Check if the file is an image (you may add other extensions if needed)
        if file_name.lower().endswith(('.jpg', '.jpeg', '.bmp', '.gif', '.tiff','.png')):
            # Create a new filename by replacing non-alphanumeric characters with '_'
            base_name, ext = os.path.splitext(file_name)
            new_base_name = re.sub(r'[^a-zA-Z0-9]', '_', base_name)
            new_file_name = f"{new_base_name}.png"
            new_file_path = os.path.join(root, new_file_name)
            
            try:
                # Open the image and convert it to PNG format
                with Image.open(file_path) as img:
                    # Convert to RGB if image has an alpha channel (for consistency)
                    if img.mode in ("RGBA", "P"):
                        img = img.convert("RGB")
                    # Save the image as PNG
                    img.save(new_file_path, 'PNG')
                
                # Optionally, delete the original file
                os.remove(file_path)
                
                print(f"Converted and renamed {file_name} to {new_file_name}")
            except Exception as e:
                print(f"Failed to process {file_name}: {e}")


Converted and renamed 08d1dc50eee1b53179cf65936a064931_1.jpg to 08d1dc50eee1b53179cf65936a064931_1.png
Converted and renamed 1021092-Hans_1_1.jpg to 1021092_Hans_1_1.png
Converted and renamed 20100127_arshile_gorky_scent_of_apricots_in_fields_1.jpg to 20100127_arshile_gorky_scent_of_apricots_in_fields_1.png
Converted and renamed 201305_hofmann_05_1.jpg to 201305_hofmann_05_1.png
Converted and renamed 290 GRAHAM_1.jpg to 290_GRAHAM_1.png
Converted and renamed 291_1.jpg to 291_1.png
Converted and renamed 50.3_gorky_imageprimacy_587_1.jpg to 50_3_gorky_imageprimacy_587_1.png
Converted and renamed 537526bb07529f2a11c22555f852db63_1.jpg to 537526bb07529f2a11c22555f852db63_1.png
Converted and renamed 76.2277_ph_web_1.jpg to 76_2277_ph_web_1.png
Converted and renamed 76.2553.152_ph_web_1.jpg to 76_2553_152_ph_web_1.png
Converted and renamed Arabs-I-Cemetry-1909-Oil-on-card_1.JPG to Arabs_I_Cemetry_1909_Oil_on_card_1.png
Converted and renamed Arshile-Gorky-Impatience-1945-6-802-500_1.jpg to Ar

In [None]:
#this code checks for corrupted images and moves them to a new folder
# Traverse through all files in the dataset folder and subfolders. If a file is corrupted, move it to the corrupted folder.
import os
import shutil
from PIL import Image

dataset_folder = r"D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original"
corrupted_folder = r"D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora_7k_corrupted"

if not os.path.exists(corrupted_folder):
    os.makedirs(corrupted_folder)

def is_image_corrupted(file_path):
    try:
        with Image.open(file_path) as img:
            img.load()  # Ensure image is fully readable
        return False
    except (IOError, SyntaxError) as e:
        print(f"Error for {file_path}: {e}")
        return True

corrupted_images = []
for root, dirs, files in os.walk(dataset_folder):
    for file_name in files:
        file_path = os.path.join(root, file_name)
        
        if file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif')):
            print(f"Checking: {file_path}")
            if is_image_corrupted(file_path):
                folder_name = os.path.basename(root)
                new_file_name = f"{folder_name}_{file_name}"
                new_file_path = os.path.join(corrupted_folder, new_file_name)
                try:
                    shutil.move(file_path, new_file_path)
                    print(f"Moved corrupted image: {file_name} -> {new_file_name}")
                    corrupted_images.append(new_file_name)
                except Exception as e:
                    print(f"Failed to move {file_name}: {e}")

if corrupted_images:
    print("\nSummary of corrupted images:")
    for img in corrupted_images:
        print(img)
else:
    print("No corrupted images found.")


Checking: D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original\Abstract_Expressionism\08d1dc50eee1b53179cf65936a064931_1.png
Checking: D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original\Abstract_Expressionism\1021092_Hans_1_1.png
Checking: D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original\Abstract_Expressionism\20100127_arshile_gorky_scent_of_apricots_in_fields_1.png
Checking: D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original\Abstract_Expressionism\201305_hofmann_05_1.png
Checking: D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original\Abstract_Expressionism\290_GRAHAM_1.png
Checking: D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original\Abstract_Expressionism\291_1.png
Checking: D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original\Abstract_Expressionism\50_3_gorky_imageprimacy_587_1.png
Checking: D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original\Abstract_Expressionism\537526bb07529f2a11c22555f852db63_1.png

In [None]:
#this code splits the dataset into training, validation and testing sets
import os
import shutil
import random
import pandas as pd
from tqdm import tqdm  # For progress bar

def create_dir_structure(base_dir, categories):
    """Creates the necessary directories for training, validation, and test sets."""
    for category in categories:
        os.makedirs(os.path.join(base_dir, 'train', category), exist_ok=True)
        os.makedirs(os.path.join(base_dir, 'val', category), exist_ok=True)
        os.makedirs(os.path.join(base_dir, 'test', category), exist_ok=True)

def split_data(source_dir, dest_dir, categories, split_ratio=0.8):
    """Splits the dataset into train, val, and test sets."""
    class_counts = {category: {"train": 0, "val": 0, "test": 0} for category in categories}
    
    for category in categories:
        category_path = os.path.join(source_dir, category)
        if not os.path.isdir(category_path):
            continue
        images = os.listdir(category_path)
        random.shuffle(images)  # Shuffle to randomize the selection
        
        # Total number of images
        num_images = len(images)
        print(f"Processing category '{category}' ({num_images} images)...")

        # Calculate split sizes based on the desired percentages
        split_idx_train = int(num_images * split_ratio)  # 80% for train
        remaining = num_images - split_idx_train  # Remaining 20% for validation and test
        split_idx_val = remaining // 2  # 10% for validation
        split_idx_test = remaining - split_idx_val  # 10% for test

        # Define splits
        train_images = images[:split_idx_train]
        val_images = images[split_idx_train:split_idx_train + split_idx_val]
        test_images = images[split_idx_train + split_idx_val:]

        # Copy images to respective directories with progress bar
        print(f"Splitting images for '{category}'...")
        for split, image_list, split_name in zip(
            [train_images, val_images, test_images],
            [train_images, val_images, test_images],
            ["train", "val", "test"]
        ):
            for image in tqdm(image_list, desc=f"{split_name.capitalize()} Split"):
                shutil.copy(os.path.join(category_path, image), os.path.join(dest_dir, split_name, category, image))
                class_counts[category][split_name] += 1

    return class_counts

def display_summary(class_counts):
    """Displays a summary of how images are distributed across train, test, and validation."""
    summary = []
    total_images = {"train": 0, "val": 0, "test": 0}
    
    for category, counts in class_counts.items():
        summary.append({
            "Category": category,
            "Train": counts["train"],
            "Validation": counts["val"],
            "Test": counts["test"],
        })
        total_images["train"] += counts["train"]
        total_images["val"] += counts["val"]
        total_images["test"] += counts["test"]
    
    summary.append({
        "Category": "Total",
        "Train": total_images["train"],
        "Validation": total_images["val"],
        "Test": total_images["test"],
    })
    
    # Create a DataFrame for better visualization
    df = pd.DataFrame(summary)
    print("\nFinal Summary:")
    print(df)
    return df

# Main script
source_dir = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original"
dest_dir = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_split_80"
categories = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]


# Create directory structure and split data
create_dir_structure(dest_dir, categories)
class_counts = split_data(source_dir, dest_dir, categories, split_ratio=0.8) #split ratio can be changed here

# Display summary
summary_df = display_summary(class_counts)


Processing category 'Abstract_Expressionism' (340 images)...
Splitting images for 'Abstract_Expressionism'...


Train Split: 100%|██████████| 272/272 [25:52<00:00,  5.71s/it]  
Val Split: 100%|██████████| 34/34 [06:21<00:00, 11.22s/it]
Test Split: 100%|██████████| 34/34 [04:11<00:00,  7.41s/it]


Processing category 'Baroque' (960 images)...
Splitting images for 'Baroque'...


Train Split: 100%|██████████| 768/768 [12:24<00:00,  1.03it/s]  
Val Split: 100%|██████████| 96/96 [01:02<00:00,  1.53it/s]
Test Split: 100%|██████████| 96/96 [00:38<00:00,  2.51it/s]


Processing category 'Cubism' (920 images)...
Splitting images for 'Cubism'...


Train Split: 100%|██████████| 736/736 [04:33<00:00,  2.69it/s] 
Val Split: 100%|██████████| 92/92 [00:48<00:00,  1.89it/s]
Test Split: 100%|██████████| 92/92 [00:25<00:00,  3.64it/s]


Processing category 'Fauvism' (394 images)...
Splitting images for 'Fauvism'...


Train Split: 100%|██████████| 315/315 [02:56<00:00,  1.78it/s]
Val Split: 100%|██████████| 39/39 [00:10<00:00,  3.83it/s]
Test Split: 100%|██████████| 40/40 [00:03<00:00, 12.34it/s]


Processing category 'HighRenaissance' (812 images)...
Splitting images for 'HighRenaissance'...


Train Split: 100%|██████████| 649/649 [02:21<00:00,  4.60it/s]
Val Split: 100%|██████████| 81/81 [00:22<00:00,  3.64it/s]
Test Split: 100%|██████████| 82/82 [00:22<00:00,  3.64it/s]


Processing category 'Iconoclasm' (665 images)...
Splitting images for 'Iconoclasm'...


Train Split: 100%|██████████| 532/532 [14:06<00:00,  1.59s/it]  
Val Split: 100%|██████████| 66/66 [00:36<00:00,  1.79it/s]
Test Split: 100%|██████████| 67/67 [01:27<00:00,  1.30s/it]


Processing category 'Impressionism' (984 images)...
Splitting images for 'Impressionism'...


Train Split: 100%|██████████| 787/787 [01:51<00:00,  7.09it/s]
Val Split: 100%|██████████| 98/98 [00:05<00:00, 17.60it/s]
Test Split: 100%|██████████| 99/99 [00:08<00:00, 11.17it/s]


Processing category 'OldGreekPottery' (350 images)...
Splitting images for 'OldGreekPottery'...


Train Split: 100%|██████████| 280/280 [00:15<00:00, 18.55it/s]
Val Split: 100%|██████████| 35/35 [00:01<00:00, 18.31it/s]
Test Split: 100%|██████████| 35/35 [00:02<00:00, 17.42it/s]


Processing category 'realism' (307 images)...
Splitting images for 'realism'...


Train Split: 100%|██████████| 245/245 [00:28<00:00,  8.65it/s]
Val Split: 100%|██████████| 31/31 [00:02<00:00, 14.48it/s]
Test Split: 100%|██████████| 31/31 [00:02<00:00, 13.19it/s]


Processing category 'Rococo' (844 images)...
Splitting images for 'Rococo'...


Train Split: 100%|██████████| 675/675 [00:46<00:00, 14.41it/s]
Val Split: 100%|██████████| 84/84 [00:04<00:00, 18.76it/s]
Test Split: 100%|██████████| 85/85 [00:07<00:00, 11.78it/s]


Processing category 'Romanticism' (874 images)...
Splitting images for 'Romanticism'...


Train Split: 100%|██████████| 699/699 [00:44<00:00, 15.74it/s]
Val Split: 100%|██████████| 87/87 [00:05<00:00, 14.68it/s]
Test Split: 100%|██████████| 88/88 [00:14<00:00,  6.07it/s]


Processing category 'Surrealism' (242 images)...
Splitting images for 'Surrealism'...


Train Split: 100%|██████████| 193/193 [00:21<00:00,  9.00it/s]
Val Split: 100%|██████████| 24/24 [00:01<00:00, 13.45it/s]
Test Split: 100%|██████████| 25/25 [00:01<00:00, 15.43it/s]



Final Summary:
                  Category  Train  Validation  Test
0   Abstract_Expressionism    272          34    34
1                  Baroque    768          96    96
2                   Cubism    736          92    92
3                  Fauvism    315          39    40
4          HighRenaissance    649          81    82
5               Iconoclasm    532          66    67
6            Impressionism    787          98    99
7          OldGreekPottery    280          35    35
8                  realism    245          31    31
9                   Rococo    675          84    85
10             Romanticism    699          87    88
11              Surrealism    193          24    25
12                   Total   6151         767   774


In [None]:
#70:15:15 split for training, validation, and testing. create and store the images in the respective directories
import os
import shutil
import random
import pandas as pd
from tqdm import tqdm  # For progress bar

def create_dir_structure(base_dir, categories):
    """Creates the necessary directories for training, validation, and test sets."""
    for category in categories:
        os.makedirs(os.path.join(base_dir, 'train', category), exist_ok=True)
        os.makedirs(os.path.join(base_dir, 'val', category), exist_ok=True)
        os.makedirs(os.path.join(base_dir, 'test', category), exist_ok=True)

def split_data(source_dir, dest_dir, categories, split_ratio=0.7):
    """Splits the dataset into train, val, and test sets."""
    class_counts = {category: {"train": 0, "val": 0, "test": 0} for category in categories}
    
    for category in categories:
        category_path = os.path.join(source_dir, category)
        if not os.path.isdir(category_path):
            continue
        images = os.listdir(category_path)
        random.shuffle(images)  # Shuffle to randomize the selection
        
        # Total number of images
        num_images = len(images)
        print(f"Processing category '{category}' ({num_images} images)...")

        # Calculate split sizes based on the desired percentages
        split_idx_train = int(num_images * split_ratio)  # 80% for train
        remaining = num_images - split_idx_train  # Remaining 20% for validation and test
        split_idx_val = remaining // 2  # 10% for validation
        split_idx_test = remaining - split_idx_val  # 10% for test

        # Define splits
        train_images = images[:split_idx_train]
        val_images = images[split_idx_train:split_idx_train + split_idx_val]
        test_images = images[split_idx_train + split_idx_val:]

        # Copy images to respective directories with progress bar
        print(f"Splitting images for '{category}'...")
        for split, image_list, split_name in zip(
            [train_images, val_images, test_images],
            [train_images, val_images, test_images],
            ["train", "val", "test"]
        ):
            for image in tqdm(image_list, desc=f"{split_name.capitalize()} Split"):
                shutil.copy(os.path.join(category_path, image), os.path.join(dest_dir, split_name, category, image))
                class_counts[category][split_name] += 1

    return class_counts

def display_summary(class_counts):
    """Displays a summary of how images are distributed across train, test, and validation."""
    summary = []
    total_images = {"train": 0, "val": 0, "test": 0}
    
    for category, counts in class_counts.items():
        summary.append({
            "Category": category,
            "Train": counts["train"],
            "Validation": counts["val"],
            "Test": counts["test"],
        })
        total_images["train"] += counts["train"]
        total_images["val"] += counts["val"]
        total_images["test"] += counts["test"]
    
    summary.append({
        "Category": "Total",
        "Train": total_images["train"],
        "Validation": total_images["val"],
        "Test": total_images["test"],
    })
    
    # Create a DataFrame for better visualization
    df = pd.DataFrame(summary)
    print("\nFinal Summary:")
    print(df)
    return df

# Main script
source_dir = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_original"
dest_dir = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_split_70"
categories = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]


# Create directory structure and split data
create_dir_structure(dest_dir, categories)
class_counts = split_data(source_dir, dest_dir, categories, split_ratio=0.7) #split ratio can be changed here

# Display summary
summary_df = display_summary(class_counts)


Processing category 'Abstract_Expressionism' (340 images)...
Splitting images for 'Abstract_Expressionism'...


Train Split: 100%|██████████| 237/237 [00:00<00:00, 1412.79it/s]
Val Split: 100%|██████████| 51/51 [00:00<00:00, 1784.48it/s]
Test Split: 100%|██████████| 52/52 [00:00<00:00, 1733.32it/s]


Processing category 'Baroque' (960 images)...
Splitting images for 'Baroque'...


Train Split: 100%|██████████| 672/672 [00:01<00:00, 496.84it/s] 
Val Split: 100%|██████████| 144/144 [00:00<00:00, 331.26it/s]
Test Split: 100%|██████████| 144/144 [00:00<00:00, 1014.46it/s]


Processing category 'Cubism' (920 images)...
Splitting images for 'Cubism'...


Train Split: 100%|██████████| 644/644 [00:00<00:00, 827.58it/s]
Val Split: 100%|██████████| 138/138 [00:00<00:00, 511.72it/s]
Test Split: 100%|██████████| 138/138 [00:00<00:00, 264.46it/s]


Processing category 'Fauvism' (394 images)...
Splitting images for 'Fauvism'...


Train Split: 100%|██████████| 275/275 [00:02<00:00, 98.10it/s] 
Val Split: 100%|██████████| 59/59 [00:00<00:00, 299.08it/s]
Test Split: 100%|██████████| 60/60 [00:02<00:00, 25.18it/s]


Processing category 'HighRenaissance' (812 images)...
Splitting images for 'HighRenaissance'...


Train Split: 100%|██████████| 568/568 [00:01<00:00, 558.34it/s]
Val Split: 100%|██████████| 122/122 [00:00<00:00, 679.10it/s]
Test Split: 100%|██████████| 122/122 [00:00<00:00, 372.50it/s]


Processing category 'Iconoclasm' (665 images)...
Splitting images for 'Iconoclasm'...


Train Split: 100%|██████████| 465/465 [00:14<00:00, 31.91it/s] 
Val Split: 100%|██████████| 100/100 [00:00<00:00, 380.78it/s]
Test Split: 100%|██████████| 100/100 [00:09<00:00, 10.82it/s]


Processing category 'Impressionism' (984 images)...
Splitting images for 'Impressionism'...


Train Split: 100%|██████████| 688/688 [00:00<00:00, 861.81it/s]
Val Split: 100%|██████████| 148/148 [00:00<00:00, 458.02it/s]
Test Split: 100%|██████████| 148/148 [00:01<00:00, 146.29it/s]


Processing category 'OldGreekPottery' (350 images)...
Splitting images for 'OldGreekPottery'...


Train Split: 100%|██████████| 244/244 [00:02<00:00, 106.76it/s]
Val Split: 100%|██████████| 53/53 [00:00<00:00, 322.92it/s]
Test Split: 100%|██████████| 53/53 [00:00<00:00, 217.60it/s]


Processing category 'realism' (307 images)...
Splitting images for 'realism'...


Train Split: 100%|██████████| 214/214 [00:01<00:00, 171.88it/s]
Val Split: 100%|██████████| 46/46 [00:02<00:00, 16.54it/s]
Test Split: 100%|██████████| 47/47 [00:00<00:00, 420.84it/s]


Processing category 'Rococo' (844 images)...
Splitting images for 'Rococo'...


Train Split: 100%|██████████| 590/590 [00:01<00:00, 511.21it/s]
Val Split: 100%|██████████| 127/127 [00:00<00:00, 590.10it/s]
Test Split: 100%|██████████| 127/127 [00:00<00:00, 416.06it/s]


Processing category 'Romanticism' (874 images)...
Splitting images for 'Romanticism'...


Train Split: 100%|██████████| 611/611 [00:01<00:00, 585.55it/s] 
Val Split: 100%|██████████| 131/131 [00:04<00:00, 30.96it/s]
Test Split: 100%|██████████| 132/132 [00:01<00:00, 68.09it/s]


Processing category 'Surrealism' (242 images)...
Splitting images for 'Surrealism'...


Train Split: 100%|██████████| 169/169 [00:03<00:00, 55.02it/s] 
Val Split: 100%|██████████| 36/36 [00:00<00:00, 556.06it/s]
Test Split: 100%|██████████| 37/37 [00:00<00:00, 198.60it/s]


Final Summary:
                  Category  Train  Validation  Test
0   Abstract_Expressionism    237          51    52
1                  Baroque    672         144   144
2                   Cubism    644         138   138
3                  Fauvism    275          59    60
4          HighRenaissance    568         122   122
5               Iconoclasm    465         100   100
6            Impressionism    688         148   148
7          OldGreekPottery    244          53    53
8                  realism    214          46    47
9                   Rococo    590         127   127
10             Romanticism    611         131   132
11              Surrealism    169          36    37
12                   Total   5377        1155  1160





Segmenting the image into 5 patches. This method was based on Imran et al (2023)

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import matplotlib.pyplot as plt
from PIL import Image
import cv2

In [None]:
#for testing if teh sample are being segmnted correctly
import os
import cv2
from tqdm import tqdm

def preprocess_and_save_patches(input_dir, output_dir, patch_size=(224, 224)):
    """
    Preprocesses images into patches and saves them in a structured folder format.
    
    Folder structure:
    output_dir/
        train/
            class_1/
                image_1/  # Folder containing all patches of image_1
                    image_1_patch_0.jpg
                    image_1_patch_1.jpg
                    ...
                    image_1_resized.jpg  # The resized original image
                image_2/
                    ...
            class_2/
                ...
        val/
            ...
        test/
            ...
    """
    os.makedirs(output_dir, exist_ok=True)

    for split in ["train", "val", "test"]:  # Loop through dataset splits
        split_input_dir = os.path.join(input_dir, split)
        split_output_dir = os.path.join(output_dir, split)
        os.makedirs(split_output_dir, exist_ok=True)

        for label_dir in os.listdir(split_input_dir):
            label_path = os.path.join(split_input_dir, label_dir)
            if not os.path.isdir(label_path):
                continue

            # Create directory for label in output
            output_label_path = os.path.join(split_output_dir, label_dir)
            os.makedirs(output_label_path, exist_ok=True)

            for img_file in tqdm(os.listdir(label_path), desc=f"Processing {split}/{label_dir}"):
                img_path = os.path.join(label_path, img_file)
                image = cv2.imread(img_path)
                
                if image is None:
                    print(f"Skipping {img_path}: Unable to read")
                    continue

                # Create subfolder for each image's patches
                image_name = os.path.splitext(img_file)[0]
                image_output_path = os.path.join(output_label_path, image_name)
                os.makedirs(image_output_path, exist_ok=True)

                # Split into patches
                height, width = image.shape[:2]
                mid_x, mid_y = width // 2, height // 2
                start_x, end_x = int(0.25 * width), int(0.75 * width)
                start_y, end_y = int(0.25 * height), int(0.75 * height)

                patches = [
                    image[0:mid_y, 0:mid_x],  # Top-left
                    image[0:mid_y, mid_x:width],  # Top-right
                    image[mid_y:height, 0:mid_x],  # Bottom-left
                    image[mid_y:height, mid_x:width],  # Bottom-right
                    image[start_y:end_y, start_x:end_x],  # Center
                ]

                # Resize and save each patch
                for i, patch in enumerate(patches):
                    resized_patch = cv2.resize(patch, patch_size)
                    patch_filename = f"{image_name}_patch_{i}.jpg"
                    patch_path = os.path.join(image_output_path, patch_filename)
                    cv2.imwrite(patch_path, resized_patch)

                # Resize and save the original image as well
                # resized_original_image = cv2.resize(image, patch_size)
                # original_filename = f"{image_name}_resized.jpg"
                # original_image_path = os.path.join(image_output_path, original_filename)
                # cv2.imwrite(original_image_path, resized_original_image)
                
#these are different input and output directories
# # Define input and output directories
# input_directory = "D:/git/FYP/Python/TwoStage/data/Pandora_7k/sample"
# output_directory = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\sample_test_forscript"
# # Define input and output directories
# input_directory = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_split_80"
# output_directory = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_split_80_patches"

# Define input and output directories
input_directory = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_split_70"
output_directory = "D:\git\FYP\Python\TwoStage\data\Pandora_7k\Pandora7k_split_70_patches"

# Run the preprocessing function
preprocess_and_save_patches(input_directory, output_directory)


Processing train/Abstract_Expressionism: 100%|██████████| 4/4 [00:00<00:00, 19.61it/s]
Processing train/Baroque: 100%|██████████| 4/4 [00:00<00:00, 29.23it/s]
Processing train/Cubism: 100%|██████████| 4/4 [00:00<00:00, 46.65it/s]
Processing val/Abstract_Expressionism: 100%|██████████| 2/2 [00:00<00:00, 33.47it/s]
Processing val/Baroque: 100%|██████████| 2/2 [00:00<00:00, 30.08it/s]
Processing val/Cubism: 100%|██████████| 2/2 [00:00<00:00, 32.59it/s]
Processing test/Abstract_Expressionism: 100%|██████████| 2/2 [00:00<00:00, 27.49it/s]
Processing test/Baroque: 100%|██████████| 2/2 [00:00<00:00, 33.48it/s]
Processing test/Cubism: 100%|██████████| 2/2 [00:00<00:00, 35.01it/s]
