In [19]:
import os
import shutil
import random

def split_data(source_dir, train_dir, val_dir, split_ratio=0.8):
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
        print(f"Created directory: {train_dir}")
    if not os.path.exists(val_dir):
        os.makedirs(val_dir)
        print(f"Created directory: {val_dir}")

    classes = ['men', 'women']  # 'men' と 'women' クラスのみを対象とする

    for class_name in classes:
        class_dir = os.path.join(source_dir, class_name)
        if not os.path.isdir(class_dir):
            print(f"Skipping non-directory: {class_dir}")
            continue
        
        train_class_dir = os.path.join(train_dir, class_name)
        val_class_dir = os.path.join(val_dir, class_name)
        
        if not os.path.exists(train_class_dir):
            os.makedirs(train_class_dir)
            print(f"Created directory: {train_class_dir}")
        if not os.path.exists(val_class_dir):
            os.makedirs(val_class_dir)
            print(f"Created directory: {val_class_dir}")
        
        images = os.listdir(class_dir)
        random.shuffle(images)
        
        split_point = int(len(images) * split_ratio)
        train_images = images[:split_point]
        val_images = images[split_point:]
        
        for image in train_images:
            src = os.path.join(class_dir, image)
            dst = os.path.join(train_class_dir, image)
            shutil.copyfile(src, dst)
            print(f"Copied {src} to {dst}")
        
        for image in val_images:
            src = os.path.join(class_dir, image)
            dst = os.path.join(val_class_dir, image)
            shutil.copyfile(src, dst)
            print(f"Copied {src} to {dst}")

source_dir = 'CNNself/dataset'
train_dir = 'CNNself/dataset/train'
val_dir = 'CNNself/dataset/val'

split_data(source_dir, train_dir, val_dir, split_ratio=0.8)


Created directory: CNNself/dataset/train
Created directory: CNNself/dataset/validation
Created directory: CNNself/dataset/train\men
Created directory: CNNself/dataset/validation\men
Copied CNNself/dataset\men\00002052.jpg to CNNself/dataset/train\men\00002052.jpg
Copied CNNself/dataset\men\00000562.jpg to CNNself/dataset/train\men\00000562.jpg
Copied CNNself/dataset\men\00000506.jpg to CNNself/dataset/train\men\00000506.jpg
Copied CNNself/dataset\men\00000487.jpg to CNNself/dataset/train\men\00000487.jpg
Copied CNNself/dataset\men\00000954.jpg to CNNself/dataset/train\men\00000954.jpg
Copied CNNself/dataset\men\00000347.jpg to CNNself/dataset/train\men\00000347.jpg
Copied CNNself/dataset\men\00000240.jpg to CNNself/dataset/train\men\00000240.jpg
Copied CNNself/dataset\men\00000318.jpg to CNNself/dataset/train\men\00000318.jpg
Copied CNNself/dataset\men\00000061.jpg to CNNself/dataset/train\men\00000061.jpg
Copied CNNself/dataset\men\00000032.jpg to CNNself/dataset/train\men\00000032.jp