In [3]:
import os
import shutil
import numpy as np
import logging
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def create_directories(base_dir, categories):
    """
    Create directories for training, testing, and validation datasets.

    Parameters:
    - base_dir (str): The base directory where the train, test, and validation directories will be created.
    - categories (list): List of category names (sub-directory names) to create inside each of train, test, and validation directories.
    """
    for subset in ['train', 'test', 'validation']:
        for category in categories:
            dir_path = os.path.join(base_dir, subset, category)
            os.makedirs(dir_path, exist_ok=True)
            logging.info(f'Created directory: {dir_path}')

def copy_images(source_dir, images, dest_dir):
    """
    Copy images from source directory to destination directory.

    Parameters:
    - source_dir (str): The directory containing the source images.
    - images (list): List of image filenames to copy.
    - dest_dir (str): The destination directory where images will be copied.
    """
    for image in images:
        shutil.copy(os.path.join(source_dir, image), os.path.join(dest_dir, image))
        logging.info(f'Copied {image} to {dest_dir}')

def split_dataset(source_dir, base_dir, split_ratios=(0.7, 0.15, 0.15)):
    """
    Split the dataset into training, testing, and validation sets.

    Parameters:
    - source_dir (str): The source directory containing subdirectories of images for each category.
    - base_dir (str): The base directory where the split datasets will be stored.
    - split_ratios (tuple): A tuple containing the ratios for splitting the dataset into train, test, and validation sets.

    Raises:
    - ValueError: If the split ratios do not sum to 1.
    """
    if sum(split_ratios) != 1:
        raise ValueError("Split ratios must sum to 1. Provided ratios sum to {:.2f}".format(sum(split_ratios)))
    
    categories = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]
    create_directories(base_dir, categories)

    for category in categories:
        category_path = os.path.join(source_dir, category)
        images = os.listdir(category_path)
        np.random.shuffle(images)

        train_split = int(len(images) * split_ratios[0])
        test_split = int(len(images) * split_ratios[1])

        train_images = images[:train_split]
        test_images = images[train_split:train_split + test_split]
        validation_images = images[train_split + test_split:]

        copy_images(category_path, train_images, os.path.join(base_dir, 'train', category))
        copy_images(category_path, test_images, os.path.join(base_dir, 'test', category))
        copy_images(category_path, validation_images, os.path.join(base_dir, 'validation', category))
        
def preprocess_image(image_path):
    img = Image.open(image_path)
    if img.mode == 'P' and 'transparency' in img.info:
        img = img.convert('RGBA')
        img = img.convert('RGB')
    if img.mode == 'RGBA':
        img = img.convert('RGB')
    return img

def setup_and_split_dataset(source_dir='Data', base_dir='Dataset_Split', split_ratios=(0.7, 0.15, 0.15)):
    """
    Set up logging, define directories, and split the dataset.

    Parameters:
    - source_dir (str): The source directory containing subdirectories of images for each category.
    - base_dir (str): The base directory where the split datasets will be stored.
    - split_ratios (tuple): A tuple containing the ratios for splitting the dataset into train, test, and validation sets.
    """
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

    try:
        split_dataset(source_dir, base_dir, split_ratios)
    except ValueError as e:
        logging.error(e)
        


if __name__ == "__main__":
    setup_and_split_dataset()

2024-05-19 23:49:13,631 - INFO - Created directory: Dataset_Split\train\Ace
2024-05-19 23:49:13,632 - INFO - Created directory: Dataset_Split\train\Akainu
2024-05-19 23:49:13,633 - INFO - Created directory: Dataset_Split\train\Brook
2024-05-19 23:49:13,633 - INFO - Created directory: Dataset_Split\train\Chopper
2024-05-19 23:49:13,634 - INFO - Created directory: Dataset_Split\train\Crocodile
2024-05-19 23:49:13,635 - INFO - Created directory: Dataset_Split\train\Franky
2024-05-19 23:49:13,635 - INFO - Created directory: Dataset_Split\train\Jinbei
2024-05-19 23:49:13,636 - INFO - Created directory: Dataset_Split\train\Kurohige
2024-05-19 23:49:13,637 - INFO - Created directory: Dataset_Split\train\Law
2024-05-19 23:49:13,637 - INFO - Created directory: Dataset_Split\train\Luffy
2024-05-19 23:49:13,638 - INFO - Created directory: Dataset_Split\train\Mihawk
2024-05-19 23:49:13,638 - INFO - Created directory: Dataset_Split\train\Nami
2024-05-19 23:49:13,639 - INFO - Created directory: Data

Found 8204 images belonging to 18 classes.




Found 8204 images belonging to 18 classes.
Found 1751 images belonging to 18 classes.
Found 1782 images belonging to 18 classes.
