use this script to generate a K-fold cross validation on `roboflow_merged`

In [None]:
# take `../../datasets/roboflow_merged` with `images` and `labels` in yolo11 format and perform a parametric k fold cross validation

# Import necessary libraries
import os
import shutil
from sklearn.model_selection import KFold
import glob
import random
 
# Define paths
dataset_path = "../../datasets/roboflow_merged"
images_path = os.path.join(dataset_path, "images")
labels_path = os.path.join(dataset_path, "labels")
output_path = "../../datasets/cross_validation_splits"
os.makedirs(output_path, exist_ok=True)
 
# Parameters
k = 4  # Number of folds
random_seed = 21
 
# Get all image files
image_files = glob.glob(os.path.join(images_path, "*.jpg"))
random.seed(random_seed)
random.shuffle(image_files)
 
# Perform K-Fold split
kf = KFold(n_splits=k, shuffle=True, random_state=random_seed)
for fold, (train_idx, val_idx) in enumerate(kf.split(image_files)):
    fold_path = os.path.join(output_path, f"fold_{fold}")
    train_images_path = os.path.join(fold_path, "train", "images")
    train_labels_path = os.path.join(fold_path, "train", "labels")
    val_images_path = os.path.join(fold_path, "val", "images")
    val_labels_path = os.path.join(fold_path, "val", "labels")
    os.makedirs(train_images_path, exist_ok=True)
    os.makedirs(train_labels_path, exist_ok=True)
    os.makedirs(val_images_path, exist_ok=True)
    os.makedirs(val_labels_path, exist_ok=True)
    
    # Copy training files
    for idx in train_idx:
        image_file = image_files[idx]
        label_file = os.path.join(labels_path, os.path.basename(image_file).replace(".jpg", ".txt"))
        shutil.copy(image_file, train_images_path)
        shutil.copy(label_file, train_labels_path)
    
    # Copy validation files
    for idx in val_idx:
        image_file = image_files[idx]
        label_file = os.path.join(labels_path, os.path.basename(image_file).replace(".jpg", ".txt"))
        shutil.copy(image_file, val_images_path)
        shutil.copy(label_file, val_labels_path)
 
print(f"K-Fold cross-validation splits created at {output_path}")


K-Fold cross-validation splits created at ../../datasets/k_fold_splits
