In [1]:
import os
import shutil
import random

def split_dataset(source_dir, dest_dir, train_ratio=0.75, val_ratio=0.10, test_ratio=0.15, seed=42):
    random.seed(seed)

    # Get all subfolders from neighbourhood and park
    all_folders = []
    for category in ["neighbourhood", "park"]:
        category_path = os.path.join(source_dir, category)
        if os.path.exists(category_path):
            subfolders = [os.path.join(category_path, sf) for sf in os.listdir(category_path) 
                          if os.path.isdir(os.path.join(category_path, sf))]
            all_folders.extend(subfolders)

    # Shuffle folders
    random.shuffle(all_folders)

    total = len(all_folders)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)

    train_folders = all_folders[:train_end]
    val_folders = all_folders[train_end:val_end]
    test_folders = all_folders[val_end:]

    # Utility to copy subfolders
    def copy_folders(folders, split_name):
        for folder in folders:
            rel_path = os.path.relpath(folder, source_dir)   # preserve "neighbourhood/0" structure
            dest_path = os.path.join(dest_dir, split_name, rel_path)
            shutil.copytree(folder, dest_path)

    # Copy data
    copy_folders(train_folders, "train")
    copy_folders(val_folders, "val")
    copy_folders(test_folders, "test")

    print(f"✅ Split complete: {len(train_folders)} train, {len(val_folders)} val, {len(test_folders)} test folders.")

# Example usage:
source = "/Users/sadik2/DDOS/data/test"   # your downloaded test directory
dest = "/Users/sadik2/main_project" # where you want train/val/test
split_dataset(source, dest)


✅ Split complete: 15 train, 2 val, 3 test folders.
