In [1]:
import os
import shutil
import random

In [None]:
def create_federated_data(source_folder, num_clients=5, output_folder="federated data"):
    if not os.path.exists(source_folder):
        print(f"Source folder '{source_folder}' does not exist.")
        return
    
    # Create output folder
    os.makedirs(output_folder, exist_ok=True)
    
    # Get class subfolders
    class_folders = [d for d in os.listdir(source_folder) if os.path.isdir(os.path.join(source_folder, d))]
    
    # Create client directories
    for client_id in range(1, num_clients + 1):
        client_path = os.path.join(output_folder, f"client_{client_id}")
        os.makedirs(client_path, exist_ok=True)
        for class_name in class_folders:
            os.makedirs(os.path.join(client_path, class_name), exist_ok=True)
    
    # Distribute images
    for class_name in class_folders:
        class_path = os.path.join(source_folder, class_name)
        images = os.listdir(class_path)
        random.shuffle(images)  # Shuffle images for randomness
        
        # Split images across clients
        for idx, image in enumerate(images):
            client_id = (idx % num_clients) + 1  # Distribute evenly
            src = os.path.join(class_path, image)
            dst = os.path.join(output_folder, f"client_{client_id}", class_name, image)
            shutil.copy2(src, dst)
    
    print(f"Data successfully distributed among {num_clients} clients in '{output_folder}' folder.")

create_federated_data("processed data", num_clients=5, output_folder="federated data")

Data successfully distributed among 5 clients in 'federated data' folder.


In [3]:
def split_client_data(client_folder, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    if not os.path.exists(client_folder):
        print(f"Client folder '{client_folder}' does not exist.")
        return
    
    # Paths for train, val, and test sets
    for split in ["train", "val", "test"]:
        os.makedirs(os.path.join(client_folder, split), exist_ok=True)
    
    # Get class subfolders
    class_folders = [d for d in os.listdir(client_folder) if os.path.isdir(os.path.join(client_folder, d))]
    
    for class_name in class_folders:
        class_path = os.path.join(client_folder, class_name)
        images = os.listdir(class_path)
        random.shuffle(images)
        
        num_images = len(images)
        train_end = int(num_images * train_ratio)
        val_end = train_end + int(num_images * val_ratio)
        
        splits = {
            "train": images[:train_end],
            "val": images[train_end:val_end],
            "test": images[val_end:]
        }
        
        for split, split_images in splits.items():
            split_path = os.path.join(client_folder, split, class_name)
            os.makedirs(split_path, exist_ok=True)
            for image in split_images:
                shutil.move(os.path.join(class_path, image), os.path.join(split_path, image))
        
        # Remove empty class folder
        os.rmdir(class_path)

# Apply split to all clients
clients_folder = "federated data"
for client in os.listdir(clients_folder):
    client_path = os.path.join(clients_folder, client)
    if os.path.isdir(client_path):
        split_client_data(client_path)

print("Data successfully split into train, val, and test sets for each client.")

OSError: [WinError 145] The directory is not empty: 'federated data\\client_1\\test'