In [1]:
import os
import shutil
import random

def split_dataset(input_folder, output_folder, train_percent=0.85, test_percent=0.15):
    # Create output directories for train and test sets
    train_dir = os.path.join(output_folder, 'train')
    test_dir = os.path.join(output_folder, 'test')
    
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    # List all class folders in the input folder
    class_folders = [f for f in os.listdir(input_folder) if os.path.isdir(os.path.join(input_folder, f))]
    
    for class_folder in class_folders:
        class_input_folder = os.path.join(input_folder, class_folder)
        class_train_dir = os.path.join(train_dir, class_folder)
        class_test_dir = os.path.join(test_dir, class_folder)
        
        os.makedirs(class_train_dir, exist_ok=True)
        os.makedirs(class_test_dir, exist_ok=True)
        
        # List all image filenames in the class folder
        image_files = [f for f in os.listdir(class_input_folder) if f.endswith('.jpg') or f.endswith('.png')]
        
        # Shuffle the list of image filenames
        random.shuffle(image_files)
        
        # Calculate number of images for each split
        num_images = len(image_files)
        num_train = int(train_percent * num_images)
        num_test = num_images - num_train
        
        # Assign images to train and test sets
        train_images = image_files[:num_train]
        test_images = image_files[num_train:]
        
        # Copy images to respective output directories
        copy_images(train_images, class_input_folder, class_train_dir)
        copy_images(test_images, class_input_folder, class_test_dir)

def copy_images(image_list, input_folder, output_folder):
    for image_name in image_list:
        input_path = os.path.join(input_folder, image_name)
        output_path = os.path.join(output_folder, image_name)
        shutil.copyfile(input_path, output_path)

# Define input and output folders
input_folder = r'E:\Large Mushroom Dataset\mushroom_dataset\Classes\LETS USE ONLY THIS'
output_folder = r'D:\New folder'

# Split dataset into train and test sets while preserving folder structure
split_dataset(input_folder, output_folder)


In [3]:
# Define input and output folders
input_folder = r'E:\Large Mushroom Dataset\mushroom_dataset\Classes\LETS USE ONLY THIS\poisonous'
output_folder = r'D:\New folder\poisonous'
# Split dataset into train, test, and validation sets
split_dataset(input_folder, output_folder)
