In [17]:
from PIL import Image
import os
import glob
import shutil
from sklearn.model_selection import train_test_split

In [18]:
def resize_images(input_folder, output_folder, img_size=(500, 500)):
    os.makedirs(output_folder, exist_ok=True)
    for img_path in glob.glob(os.path.join(input_folder, '*.jpg')):
        try:
            with Image.open(img_path) as img:
                img = img.convert('RGB')  # Convert to RGB mode
                img = img.resize(img_size)
                img.save(os.path.join(output_folder, os.path.basename(img_path)))
        except Exception as e:
            print(f"Error processing {img_path}: {e}")

def split_dataset(input_folder, output_folder, train_ratio=0.7, val_ratio=0.2):
    images = glob.glob(os.path.join(input_folder, '*.jpg'))
    print(f"Found {len(images)} images in {input_folder}")
    if len(images) == 0:
        raise ValueError(f"No images found in {input_folder}")

    train_images, test_images = train_test_split(images, test_size=1 - train_ratio, random_state=42)
    val_images, test_images = train_test_split(test_images, test_size=val_ratio / (1 - train_ratio), random_state=42)

    for category, img_list in zip(['train', 'val', 'test'], [train_images, val_images, test_images]):
        category_folder = os.path.join(output_folder, category)
        os.makedirs(category_folder, exist_ok=True)
        for img_path in img_list:
            shutil.copy(img_path, category_folder)


In [19]:
base_folder = r'C:\Users\User\Downloads\Documents\ISB46703 PRINCIPLES OF ARTIFICIAL INTELLIGENCE\project\data'
subspecies_folders = ['Monarch', 'Red Admiral', 'Small White', 'Painted Lady', 'Gulf Fritillary']

# Standardize images for each subspecies
for subspecies in subspecies_folders:
    input_folder = os.path.join(base_folder, subspecies)
    output_folder = os.path.join(base_folder, 'standardized', subspecies)
    resize_images(input_folder, output_folder)


In [20]:
base_folder = r'C:\Users\User\Downloads\Documents\ISB46703 PRINCIPLES OF ARTIFICIAL INTELLIGENCE\project\data\standardized'
subspecies_folders = ['Monarch', 'Red Admiral', 'Small White', 'Painted Lady', 'Gulf Fritillary']

# Split standardized images for each subspecies
for subspecies in subspecies_folders:
    input_folder = os.path.join(base_folder, subspecies)
    output_folder = os.path.join(base_folder, 'split', subspecies)
    try:
        split_dataset(input_folder, output_folder)
    except ValueError as e:
        print(e)


Found 2037 images in C:\Users\User\Downloads\Documents\ISB46703 PRINCIPLES OF ARTIFICIAL INTELLIGENCE\project\data\standardized\Monarch
Found 2005 images in C:\Users\User\Downloads\Documents\ISB46703 PRINCIPLES OF ARTIFICIAL INTELLIGENCE\project\data\standardized\Red Admiral
Found 2025 images in C:\Users\User\Downloads\Documents\ISB46703 PRINCIPLES OF ARTIFICIAL INTELLIGENCE\project\data\standardized\Small White
Found 1939 images in C:\Users\User\Downloads\Documents\ISB46703 PRINCIPLES OF ARTIFICIAL INTELLIGENCE\project\data\standardized\Painted Lady
Found 2003 images in C:\Users\User\Downloads\Documents\ISB46703 PRINCIPLES OF ARTIFICIAL INTELLIGENCE\project\data\standardized\Gulf Fritillary
