In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split
from torchvision import datasets, transforms

# Define paths and directory structure
dataset_root = '/home/jovyan/Data Mining and Machine Learning/Dataset_New/'
train_dir = os.path.join(dataset_root, 'train')
val_dir = os.path.join(dataset_root, 'val')
test_dir = os.path.join(dataset_root, 'test')

# Make sure the train, val, and test folders are empty before splitting
if os.path.exists(train_dir):
    shutil.rmtree(train_dir)
if os.path.exists(val_dir):
    shutil.rmtree(val_dir)
if os.path.exists(test_dir):
    shutil.rmtree(test_dir)

# Create directories for train, validation, and test sets with subdirectories for each class
all_images = datasets.ImageFolder(root=dataset_root)
for class_name in all_images.classes:
    os.makedirs(os.path.join(train_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(val_dir, class_name), exist_ok=True)
    os.makedirs(os.path.join(test_dir, class_name), exist_ok=True)

# Split dataset into train, val, and test sets
val_size = 0.2   # 20% for validation
test_size = 0.1  # 10% for testing

# Using train_test_split for stratified splits
train_val_images, test_images = train_test_split(
    all_images.samples, test_size=test_size, stratify=all_images.targets, random_state=42
)
train_images, val_images = train_test_split(
    train_val_images, test_size=val_size / (1 - test_size), stratify=[img[1] for img in train_val_images], random_state=42
)

# Function to move images to the target folder
def move_images(images, target_dir):
    for image_path, label in images:
        class_name = all_images.classes[label]
        dest_dir = os.path.join(target_dir, class_name)
        shutil.copy2(image_path, dest_dir)

# Move images to their respective folders
move_images(train_images, train_dir)
move_images(val_images, val_dir)
move_images(test_images, test_dir)

print("Dataset split and saved into train, val, and test folders.")

Dataset split and saved into train, val, and test folders.
