In [1]:
import numpy as np
import random
import os
import shutil

In [3]:
def split_data(input_dir, output_train_dir, output_val_dir, output_test_dir, train_ratio=0.7, val_ratio=0.1, test_ratio=0.2, num_shuffles=4, seed=42):
    os.makedirs(output_train_dir, exist_ok=True)
    os.makedirs(output_val_dir, exist_ok=True)
    os.makedirs(output_test_dir, exist_ok=True)

    numpy_files = [f for f in os.listdir(input_dir) if f.endswith('.npy')]
    total_files = len(numpy_files)

    if total_files == 0:
        raise ValueError("No .npy files found in the input directory.")

    np.random.seed(seed)
    files_array = np.array(numpy_files)
    for _ in range(num_shuffles):
        np.random.shuffle(files_array)

    train_end = int(total_files * train_ratio)
    val_end = train_end + int(total_files * val_ratio)

    train_files = files_array[:train_end]
    val_files = files_array[train_end:val_end]
    test_files = files_array[val_end:]

    def move_and_remove(source_dir, dest_dir, files):
        for file in files:
            source_path = os.path.join(source_dir, file)
            dest_path = os.path.join(dest_dir, file)
            try:
                shutil.move(source_path, dest_path)
            except (FileNotFoundError, shutil.Error) as e:
                print(f"Error moving file {file}: {e}")

    move_and_remove(input_dir, output_train_dir, train_files)
    move_and_remove(input_dir, output_val_dir, val_files)
    move_and_remove(input_dir, output_test_dir, test_files)

    print("Data split complete. Original files removed from source directory.")


# Example usage (same as before):
input_directory = "numpy_arrays"
train_directory = "numpy_arrays/train"
val_directory = "numpy_arrays/val"
test_directory = "numpy_arrays/test"

split_data(input_directory, train_directory, val_directory, test_directory, num_shuffles=4)

Data split complete. Original files removed from source directory.
