In [4]:
import os
import shutil
import numpy as np

def split_dataset(source_dir, dest_dir, train_size=0.6, val_size=0.2, test_size=0.2, random_state=42):

    np.random.seed(random_state)

    splits = ['train', 'val', 'test']
    classes = ['Stable', 'Unstable']
    
    for split in splits:
        for cls in classes:
            os.makedirs(os.path.join(dest_dir, split, cls), exist_ok=True)

    for cls in classes:

        cls_path = os.path.join(source_dir, cls)
        images = [f for f in os.listdir(cls_path) if f.endswith(('.jpg', '.jpeg', '.png'))]

        images = np.array(images)
        np.random.shuffle(images)
 
        n_images = len(images)
        n_train = int(n_images * train_size)
        n_val = int(n_images * val_size)

        train_files = images[:n_train]
        val_files = images[n_train:n_train + n_val]
        test_files = images[n_train + n_val:]

        for file in train_files:
            shutil.copy2(
                os.path.join(cls_path, file),
                os.path.join(dest_dir, 'train', cls, file)
            )
            
        for file in val_files:
            shutil.copy2(
                os.path.join(cls_path, file),
                os.path.join(dest_dir, 'val', cls, file)
            )
            
        for file in test_files:
            shutil.copy2(
                os.path.join(cls_path, file),
                os.path.join(dest_dir, 'test', cls, file)
            )

        print(f"\n{cls} class split statistics:")
        print(f"Training: {len(train_files)} images ({len(train_files)/n_images:.1%})")
        print(f"Validation: {len(val_files)} images ({len(val_files)/n_images:.1%})")
        print(f"Testing: {len(test_files)} images ({len(test_files)/n_images:.1%})")

if __name__ == "__main__":

    source_directory = "OG_DS/testing_friction/6B-3D-Non-Rot"
    destination_directory = "OG_DS/testing_friction/6B-3D-Non-Rot_SPLIT"
    
    split_dataset(source_directory, destination_directory)


Stable class split statistics:
Training: 600 images (60.0%)
Validation: 200 images (20.0%)
Testing: 200 images (20.0%)

Unstable class split statistics:
Training: 600 images (60.0%)
Validation: 200 images (20.0%)
Testing: 200 images (20.0%)
