In [1]:
import pandas as pd
from sklearn.model_selection import KFold
import os
from PIL import Image

train_image = os.listdir(r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\YOLO\train_images")
train_label = pd.read_csv(r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\YOLO\train.csv")
output_dir = r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\YOLO\TRAIN_YOLO"

# ตั้งค่า K-Fold Cross Validation (10 folds)
kf = KFold(n_splits=10, shuffle=True, random_state=42)
train_image_df = pd.DataFrame({"filename": train_image})

# วนลูปเพื่อสร้าง train/test set และบันทึก TXT
for fold, (train_idx, test_idx) in enumerate(kf.split(train_image_df)):
    fold_dir = os.path.join(output_dir, f"fold_{fold+1}")
    image_train_dir = os.path.join(fold_dir, "images", "train")
    image_val_dir = os.path.join(fold_dir, "images", "validation")
    label_train_dir = os.path.join(fold_dir, "labels", "train")
    label_val_dir = os.path.join(fold_dir, "labels", "validation")
    os.makedirs(image_train_dir, exist_ok=True)
    os.makedirs(image_val_dir, exist_ok=True)
    os.makedirs(label_train_dir, exist_ok=True)
    os.makedirs(label_val_dir, exist_ok=True)

    # แยก train และ validation set
    train_fold_df = train_image_df.iloc[train_idx].reset_index(drop=True)
    test_fold_df = train_image_df.iloc[test_idx].reset_index(drop=True)

    # Save train images
    for image_name in train_fold_df["filename"]:
        src_path = os.path.join(r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\YOLO\train_images", image_name)
        dest_path = os.path.join(image_train_dir, image_name)
        if os.path.exists(src_path):
            Image.open(src_path).save(dest_path)

    # Save validation images
    for image_name in test_fold_df["filename"]:
        src_path = os.path.join(r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\YOLO\train_images", image_name)
        dest_path = os.path.join(image_val_dir, image_name)
        if os.path.exists(src_path):
            Image.open(src_path).save(dest_path)

    # Dictionary สำหรับ mapping ชื่อคลาสเป็นตัวเลข
    class_mapping = {"red blood cell": 0,"trophozoite": 1,"schizont": 2,"difficult": 3,"ring": 4,"leukocyte": 5,"gametocyte": 6}

    # Save train labels as TXT
    for image_name in train_fold_df["filename"]:
        label_file_path = os.path.join(label_train_dir, os.path.splitext(image_name)[0] + ".txt")
        label_data = train_label[train_label['image_name'] == image_name]
        with open(label_file_path, 'w') as f:
            for _, row in label_data.iterrows():
                class_id = class_mapping.get(row['label'], -1)  # แปลงชื่อคลาสเป็น ID (ค่า -1 ถ้าไม่พบ)
                if class_id != -1:  # ตรวจสอบว่า class_id ถูกต้อง
                    f.write(f"{class_id} {row['xmin']} {row['ymin']} {row['xmax']} {row['ymax']}\n")

    # Save validation labels as TXT
    for image_name in test_fold_df["filename"]:
        label_file_path = os.path.join(label_val_dir, os.path.splitext(image_name)[0] + ".txt")
        label_data = train_label[train_label['image_name'] == image_name]
        with open(label_file_path, 'w') as f:
            for _, row in label_data.iterrows():
                class_id = class_mapping.get(row['label'], -1)  # แปลงชื่อคลาสเป็น ID
                if class_id != -1:
                    f.write(f"{class_id} {row['xmin']} {row['ymin']} {row['xmax']} {row['ymax']}\n")


In [None]:
import os

def create_yaml_files(dataset_path, num_folds, class_names):
    """
    สร้างไฟล์ .yaml สำหรับแต่ละ fold เพื่อใช้เทรน YOLO
    
    Parameters:
        dataset_path (str): พาธหลักของ dataset
        num_folds (int): จำนวน folds ที่ต้องการสร้าง
        class_names (dict): dictionary ของ class names เช่น {0: 'malaria', 1: 'healthy'}
    """
    for fold in range(1, num_folds + 1):
        fold_dir = os.path.join(dataset_path, f"fold_{fold}")
        yaml_path = os.path.join(fold_dir, f"fold_{fold}.yaml")
        
        yaml_content = f"""\
# YOLO dataset config file for Fold {fold}
path: {fold_dir.replace('\\', '/')}  # โฟลเดอร์หลักของ fold_{fold}
train: {fold_dir.replace('\\', '/')}/images/train  # พาธไปยัง train set
val: {fold_dir.replace('\\', '/')}/images/validation  # พาธไปยัง validation set

names:
"""
        for class_id, class_name in class_names.items():
            yaml_content += f"  {class_id}: \"{class_name}\"\n"
        
        # สร้างไฟล์ yaml
        with open(yaml_path, "w", encoding="utf-8") as yaml_file:
            yaml_file.write(yaml_content)
        
        print(f"✅ สร้างไฟล์ YAML สำหรับ Fold {fold}: {yaml_path}")

# ตัวอย่างการใช้งาน
dataset_path = r"C:\Users\BMEI CMU\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\YOLO\TRAIN_YOLO"  # พาธหลักของ dataset
num_folds = 10  # จำนวน folds
class_names = {0: "red blood cell", 1: "trophozoite", 2: "schizont", 3: "difficult", 4: "ring", 5: "leukocyte", 6: "gametocyte"}

create_yaml_files(dataset_path, num_folds, class_names)