In [2]:
import os
import shutil
import random
from tqdm import tqdm

source_directory = "COVID-19_Radiography_Dataset"
classes = ["COVID", "Normal"]

# Destination 
destination_directory = "DataSplit"

splits = ["train", "val", "test"]
split_ratios = [0.70, 0.15, 0.15]

for cls in classes:
    img_directory = os.path.join(source_directory, cls, "images") # Create path ex. COVID-19_Radiography_Dataset/COVID/images
    img_files = os.listdir(img_directory)
    random.shuffle(img_files)

    train_split = int(split_ratios[0] * len(img_files))
    val_split = int(split_ratios[1] * len(img_files))

    split_data = {
        "train": img_files[:train_split],
        "val": img_files[train_split:train_split + val_split],
        "test": img_files[train_split + val_split:]
    }

    for split in splits:
        target_directory = os.path.join(destination_directory, split, cls)
        os.makedirs(target_directory, exist_ok=True)

        for filename in tqdm(split_data[split], desc=f"Copying {cls}/{split}"):
            source_path = os.path.join(img_directory, filename)
            destination_path = os.path.join(target_directory, filename)
            shutil.copy(source_path, destination_path)


Copying COVID/train: 100%|██████████| 2531/2531 [00:01<00:00, 2171.28it/s]
Copying COVID/val: 100%|██████████| 542/542 [00:00<00:00, 2417.45it/s]
Copying COVID/test: 100%|██████████| 543/543 [00:00<00:00, 2458.23it/s]
Copying Normal/train: 100%|██████████| 7134/7134 [00:03<00:00, 2270.07it/s]
Copying Normal/val: 100%|██████████| 1528/1528 [00:00<00:00, 2233.41it/s]
Copying Normal/test: 100%|██████████| 1530/1530 [00:00<00:00, 1990.76it/s]
