In [6]:
import os
import shutil
import random


In [2]:
def split_dataset(input_folder, output_folder, train_ratio, val_ratio, test_ratio):
    # Create output folders
    os.makedirs(output_folder, exist_ok=True)
    os.makedirs(os.path.join(output_folder, 'train'), exist_ok=True)
    os.makedirs(os.path.join(output_folder, 'val'), exist_ok=True)
    os.makedirs(os.path.join(output_folder, 'test'), exist_ok=True)
    
    # Recursively traverse the directory structure
    for root, dirs, files in os.walk(input_folder):
        # Skip root directory
        if root == input_folder:
            continue
        
        # Determine the relative path from the input folder
        relative_path = os.path.relpath(root, input_folder)
        
        # Determine the corresponding output folder based on the relative path
        train_output_folder = os.path.join(output_folder, 'train', relative_path)
        val_output_folder = os.path.join(output_folder, 'val', relative_path)
        test_output_folder = os.path.join(output_folder, 'test', relative_path)
        
        # Create output folders if they don't exist
        os.makedirs(train_output_folder, exist_ok=True)
        os.makedirs(val_output_folder, exist_ok=True)
        os.makedirs(test_output_folder, exist_ok=True)
        
        # Randomly shuffle files
        random.shuffle(files)
        
        # Calculate split sizes
        num_files = len(files)
        num_train = int(train_ratio * num_files)
        num_val = int(val_ratio * num_files)
        num_test = int(test_ratio * num_files)
        
        # Assign files to splits
        train_files = files[:num_train]
        val_files = files[num_train:num_train+num_val]
        test_files = files[num_train+num_val:]
        
        # Copy files to respective split folders
        for filename in train_files:
            src = os.path.join(root, filename)
            dst = os.path.join(train_output_folder, filename)
            shutil.copyfile(src, dst)
            
        for filename in val_files:
            src = os.path.join(root, filename)
            dst = os.path.join(val_output_folder, filename)
            shutil.copyfile(src, dst)
            
        for filename in test_files:
            src = os.path.join(root, filename)
            dst = os.path.join(test_output_folder, filename)
            shutil.copyfile(src, dst)

In [3]:
# Define input and output folders
input_folder = 'Furniture_Data'
output_folder = 'Splited_Data'

In [5]:
# Define split ratios
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2

# Split the dataset
split_dataset(input_folder, output_folder, train_ratio, val_ratio, test_ratio)