In [None]:
import os
import random
import shutil

# Define the paths for the category folders and the test/train folders
category_paths = {
    'A': 'data/classified_cells/A',
    'R': 'data/classified_cells/R'
}
train_path = 'data/train'
test_path = 'data/test'

In [None]:
# Define the split ratio (e.g. 0.8 for 80% train and 20% test)
split_ratio = 0.8

In [None]:
def clean_dir(category_path):
    try:
        shutil.rmtree(category_path)
    except FileNotFoundError:
        pass

In [None]:
clean_dir(train_path)

In [None]:
clean_dir(test_path)

In [None]:
# Loop through the category folders
for category, path in category_paths.items():

    # Get the list of image files in the category folder
    files = os.listdir(path)
    
    # Shuffle the files randomly
    random.shuffle(files)
    
    # Calculate the index to split the files into train and test sets
    split_index = int(len(files) * split_ratio)
    
    # Split the files into train and test sets
    train_files = files[:split_index]
    test_files = files[split_index:]
    
    # Create the train and test directories for this category
    train_category_path = os.path.join(train_path, category)
    test_category_path = os.path.join(test_path, category)
    
    # Delete existing images from train and test
    clean_dir(train_category_path)
    clean_dir(test_category_path)
    
    os.makedirs(train_category_path, exist_ok=True)
    os.makedirs(test_category_path, exist_ok=True)
    
    
    # Copy the train files to the train directory for this category
    for file in train_files:
        src = os.path.join(path, file)
        dst = os.path.join(train_category_path, file)
        shutil.copy(src, dst)
    
    # Copy the test files to the test directory for this category
    for file in test_files:
        src = os.path.join(path, file)
        dst = os.path.join(test_category_path, file)
        shutil.copy(src, dst)