In [None]:
import pandas as pd
from sklearn.model_selection import KFold
import os
from PIL import Image

train_image = os.listdir(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\train_images")
train_label = pd.read_csv(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\train.csv")
test_image = os.listdir(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\test_images")
test_label = pd.read_csv(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\test.csv")
output_dir = r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\YOLOV8"

# ตั้งค่า K-Fold Cross Validation (4 folds)
kf = KFold(n_splits=4, shuffle=True, random_state=42)
train_image_df = pd.DataFrame({"filename": train_image})

# วนลูปเพื่อสร้าง train/test set และบันทึก TXT
for fold, (train_idx, test_idx) in enumerate(kf.split(train_image_df)):
    fold_dir = os.path.join(output_dir, f"fold_{fold+1}")

    image_train_dir = os.path.join(fold_dir, "train", "images")
    label_train_dir = os.path.join(fold_dir, "train", "labels")

    image_val_dir = os.path.join(fold_dir, "validation", "images")
    label_val_dir = os.path.join(fold_dir, "validation", "labels")

    image_test_dir = os.path.join(fold_dir, "test", "images")
    label_test_dir = os.path.join(fold_dir, "test", "labels")

    os.makedirs(image_train_dir, exist_ok=True)
    os.makedirs(label_train_dir, exist_ok=True)

    os.makedirs(image_val_dir, exist_ok=True)
    os.makedirs(label_val_dir, exist_ok=True)

    os.makedirs(image_test_dir, exist_ok=True)
    os.makedirs(label_test_dir, exist_ok=True)

    # แยก train และ validation set
    train_fold_df = train_image_df.iloc[train_idx].reset_index(drop=True)
    test_fold_df = train_image_df.iloc[test_idx].reset_index(drop=True)

    # Save train images
    for image_name in train_fold_df["filename"]:
        src_path = os.path.join(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\train_images", image_name)
        dest_path = os.path.join(image_train_dir, image_name)
        if os.path.exists(src_path):
            Image.open(src_path).save(dest_path)

    # Save validation images
    for image_name in test_fold_df["filename"]:
        src_path = os.path.join(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\train_images", image_name)
        dest_path = os.path.join(image_val_dir, image_name)
        if os.path.exists(src_path):
            Image.open(src_path).save(dest_path)
    
    # Save test images
    for image_name in test_image:
        src_path = os.path.join(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\test_images", image_name)
        dest_path = os.path.join(image_test_dir, image_name)
        if os.path.exists(src_path):
            Image.open(src_path).save(dest_path)

    # Dictionary สำหรับ mapping ชื่อคลาสเป็นตัวเลข
    class_mapping = {"red blood cell": 0,"trophozoite": 1,"schizont": 2,"difficult": 3,"ring": 4,"leukocyte": 5,"gametocyte": 6}

    def convert_bbox(xmin, ymin, xmax, ymax, img_width, img_height):
        center_x = (xmin + xmax) / (2 * img_width)
        center_y = (ymin + ymax) / (2 * img_height)
        width = (xmax - xmin) / img_width
        height = (ymax - ymin) / img_height
        return center_x, center_y, width, height

    # Save train labels as TXT
    for image_name in train_fold_df["filename"]:
        label_file_path = os.path.join(label_train_dir, os.path.splitext(image_name)[0] + ".txt")
        label_data = train_label[train_label['image_name'] == image_name]
        image_path = os.path.join(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\train_images", image_name)
        img = Image.open(image_path)
        img_width, img_height = img.size
        with open(label_file_path, 'w') as f:
            for _, row in label_data.iterrows():
                class_id = class_mapping.get(row['label'], -1)
                if class_id != -1:
                    cx, cy, w, h = convert_bbox(row['xmin'], row['ymin'], row['xmax'], row['ymax'], img_width, img_height)
                    f.write(f"{class_id} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")

    # Save validation labels as TXT
    for image_name in test_fold_df["filename"]:
        label_file_path = os.path.join(label_val_dir, os.path.splitext(image_name)[0] + ".txt")
        label_data = train_label[train_label['image_name'] == image_name]
        image_path = os.path.join(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\train_images", image_name)
        img = Image.open(image_path)
        img_width, img_height = img.size
        with open(label_file_path, 'w') as f:
            for _, row in label_data.iterrows():
                class_id = class_mapping.get(row['label'], -1)
                if class_id != -1:
                    cx, cy, w, h = convert_bbox(row['xmin'], row['ymin'], row['xmax'], row['ymax'], img_width, img_height)
                    f.write(f"{class_id} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")

    # Save test labels as TXT
    for image_name in test_image:
        label_file_path = os.path.join(label_test_dir, os.path.splitext(image_name)[0] + ".txt")
        label_data = test_label[test_label['image_name'] == image_name]
        image_path = os.path.join(r"C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\test_images", image_name)
        img = Image.open(image_path)
        img_width, img_height = img.size
        with open(label_file_path, 'w') as f:
            for _, row in label_data.iterrows():
                class_id = class_mapping.get(row['label'], -1)
                if class_id != -1:
                    cx, cy, w, h = convert_bbox(row['xmin'], row['ymin'], row['xmax'], row['ymax'], img_width, img_height)
                    f.write(f"{class_id} {cx:.6f} {cy:.6f} {w:.6f} {h:.6f}\n")

In [None]:
import os

def create_yaml_files(dataset_path, num_folds, class_names):
    """
    สร้างไฟล์ .yaml สำหรับแต่ละ fold เพื่อใช้เทรน YOLO
    
    Parameters:
        dataset_path (str): พาธหลักของ dataset
        num_folds (int): จำนวน folds ที่ต้องการสร้าง
        class_names (dict): dictionary ของ class names เช่น {0: 'malaria', 1: 'healthy'}
    """
    for fold in range(1, num_folds + 1):
        fold_dir = os.path.join(dataset_path, f"fold_{fold}").replace('\\', '/')
        yaml_path = os.path.join(fold_dir, f"fold_{fold}.yaml").replace('\\', '/')
        
        yaml_content = f"""# YOLO dataset config file for Fold {fold}
train: {fold_dir}/train/images  # พาธไปยัง train set
val: {fold_dir}/validation/images # พาธไปยัง validation set
test: {fold_dir}/test/images  # พาธไปยัง test set
ืnc: {len(class_names)}
names: [{', '.join(f'"{name}"' for name in class_names.values())}]
"""
        
        # สร้างไฟล์ yaml
        with open(yaml_path, "w", encoding="utf-8") as yaml_file:
            yaml_file.write(yaml_content)
        
        print(f"✅ สร้างไฟล์ YAML สำหรับ Fold {fold}: {yaml_path}")

# ตัวอย่างการใช้งาน
dataset_path = "C:/Users/BMEi/Documents/GitHub/WORK/Windows/CODE_BME/PROJECT_MALARIA/DATA_SET/YOLOV8"  # พาธหลักของ dataset
num_folds = 4  # จำนวน folds
class_names = {
    0: "red blood cell",
    1: "trophozoite",
    2: "schizont",
    3: "difficult",
    4: "ring",
    5: "leukocyte",
    6: "gametocyte"
}

create_yaml_files(dataset_path, num_folds, class_names)

In [None]:
from ultralytics import YOLO
import shutil
import os

fold_paths = [
    r'C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\YOLOV8\fold_1\fold_1.yaml',
    r'C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\YOLOV8\fold_2\fold_2.yaml',
    r'C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\YOLOV8\fold_3\fold_3.yaml',
    r'C:\Users\BMEi\Documents\GitHub\WORK\Windows\CODE_BME\PROJECT_MALARIA\DATA_SET\YOLOV8\fold_4\fold_4.yaml',
]

for i, path in enumerate(fold_paths, start=1):
    print(f"===== Fold {i} =====")
    model = YOLO('yolov8n.pt')

    model.train(data=path, epochs=10, device='0', project='runs_fold', name=f'fold_{i}')
    model.val(device='0')

    # Export the model to ONNX
    export_result = model.export(format='onnx')
    
    # Move the exported model to desired filename
    exported_path = export_result[0] if isinstance(export_result, (list, tuple)) else export_result
    target_path = f'yolov8n_fold_{i}.onnx'
    if os.path.exists(exported_path):
        shutil.move(exported_path, target_path)
        print(f"✅ Exported and saved to {target_path}")
    else:
        print(f"❌ Export failed for fold {i}")

print("✅ All folds completed.")


In [1]:
import os
import cv2
import logging
import pandas as pd
import matplotlib.pyplot as plt
from ultralytics import YOLO
from tabulate import tabulate

logging.getLogger("ultralytics").setLevel(logging.ERROR)

def calculate_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interArea = max(0, xB - xA) * max(0, yB - yA)
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    iou = interArea / float(boxAArea + boxBArea - interArea + 1e-6)
    return iou

# === PATH ===
base_path = r"C:/Users/BMEi/Documents/GitHub/WORK/Windows/CODE_BME/PROJECT_MALARIA/DATA_SET/YOLOV8"
model_base_path = r"C:/Users/BMEi/Documents/GitHub/WORK/Windows/CODE_BME/PROJECT_MALARIA"
iou_threshold = 0.5
save_image_folder = "results_images"
os.makedirs(save_image_folder, exist_ok=True)

# === RESULT STORAGE ===
results_table = []
detailed_results = []
false_positive_images = []
fold_stats = {}

# === LOOP OVER FOLDS ===
for i in range(1, 5):
    model_path = os.path.join(model_base_path, f"yolov8n_fold_{i}.onnx")
    model = YOLO(model_path)

    images_folder = os.path.join(base_path, f"fold_{i}", "test", "images")
    labels_folder = os.path.join(base_path, f"fold_{i}", "test", "labels")

    total_gt = 0
    correct_detect = 0
    correct_class = 0

    if i not in fold_stats:
        fold_stats[i] = {"TP": 0, "FP": 0, "FN": 0, "Images": 0}  # Images = รูปที่มี FP หรือ FN

    for filename in os.listdir(images_folder):
        if not filename.endswith(".jpg"):
            continue

        image_path = os.path.join(images_folder, filename)
        label_path = os.path.join(labels_folder, filename.replace(".jpg", ".txt"))
        if not os.path.exists(label_path):
            continue

        image = cv2.imread(image_path)
        height, width = image.shape[:2]
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        gt_boxes = []
        with open(label_path, "r") as file:
            for line in file:
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                class_id, x, y, w, h = map(float, parts)
                x *= width
                y *= height
                w *= width
                h *= height
                gt_boxes.append([int(class_id), x - w/2, y - h/2, x + w/2, y + h/2])

        results = model(image_path)
        pred_boxes = [
            [int(box.cls[0].item()), *box.xyxy[0].cpu().numpy().tolist()]
            for box in results[0].boxes
        ]

        matched = set()
        pred_matched_flags = [False] * len(pred_boxes)

        for gt_class, gt_x1, gt_y1, gt_x2, gt_y2 in gt_boxes:
            total_gt += 1
            matched_pred = None
            best_iou = 0
            matched_pred_class = None

            for j, (pred_class, pred_x1, pred_y1, pred_x2, pred_y2) in enumerate(pred_boxes):
                if pred_matched_flags[j]:
                    continue
                iou = calculate_iou([gt_x1, gt_y1, gt_x2, gt_y2], [pred_x1, pred_y1, pred_x2, pred_y2])
                if iou >= iou_threshold and iou > best_iou:
                    best_iou = iou
                    matched_pred = j
                    matched_pred_class = pred_class

            if matched_pred is not None:
                correct_detect += 1
                if matched_pred_class == gt_class:
                    correct_class += 1
                pred_matched_flags[matched_pred] = True
                matched.add(matched_pred)
                detailed_results.append({
                    "Fold": i, "Image": filename,
                    "GT_Class": gt_class,
                    "GT_Box": f"{gt_x1:.1f},{gt_y1:.1f},{gt_x2:.1f},{gt_y2:.1f}",
                    "Pred_Class": matched_pred_class,
                    "IOU": f"{best_iou:.3f}",
                    "Match": "Yes",
                    "Class_Correct": "Yes" if matched_pred_class == gt_class else "No"
                })
            else:
                detailed_results.append({
                    "Fold": i, "Image": filename,
                    "GT_Class": gt_class,
                    "GT_Box": f"{gt_x1:.1f},{gt_y1:.1f},{gt_x2:.1f},{gt_y2:.1f}",
                    "Pred_Class": None,
                    "IOU": None,
                    "Match": "No",
                    "Class_Correct": "No"
                })

        false_positive_images.append((i, filename, image_rgb.copy(), gt_boxes, pred_boxes, matched, pred_matched_flags))

    if total_gt == 0:
        results_table.append([f"Fold {i}", "ไม่มี GT", "-", "-"])
    else:
        detect_acc = correct_detect / total_gt * 100
        class_acc = correct_class / total_gt * 100
        results_table.append([f"Fold {i}", total_gt, f"{detect_acc:.2f}%", f"{class_acc:.2f}%"])

# === DISPLAY SUMMARY TABLE ===
print("\n📊 Detection & Classification Summary:\n")
print(tabulate(results_table, headers=["Fold", "Total GT", "Detection Accuracy", "Classification Accuracy"], tablefmt="grid"))

# === EXPORT DETAILED CSV ===
pd.DataFrame(detailed_results).to_csv("detection_detailed_results.csv", index=False)
print("\n📄 Exported detailed results to 'detection_detailed_results.csv'")

# === DRAW IMAGES ===
print("\n🖼️ Saving GT vs Prediction comparison images by fold into 'results_images/'...\n")

for fold, filename, image_rgb, gt_boxes, pred_boxes, matched, pred_matched_flags in false_positive_images:
    fold_dir = os.path.join(save_image_folder, f"fold{fold}")
    os.makedirs(fold_dir, exist_ok=True)

    gt_img = image_rgb.copy()
    fn_count = 0
    for gt_class, x1, y1, x2, y2 in gt_boxes:
        is_fn = True
        for j, (pred_class, px1, py1, px2, py2) in enumerate(pred_boxes):
            if j in matched and calculate_iou([x1, y1, x2, y2], [px1, py1, px2, py2]) >= iou_threshold:
                is_fn = False
                break
        if is_fn:
            fn_count += 1
        color = (0, 255, 255) if not is_fn else (0, 0, 255)
        label = f"FN: {gt_class}" if is_fn else f"GT: {gt_class}"
        cv2.rectangle(gt_img, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
        cv2.putText(gt_img, label, (int(x1), int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    pred_img = image_rgb.copy()
    tp_count, fp_count = 0, 0
    for j, (pred_class, x1, y1, x2, y2) in enumerate(pred_boxes):
        if pred_matched_flags[j]:
            color = (0, 255, 255)
            label = f"TP: {pred_class}"
            tp_count += 1
        else:
            color = (255, 0, 0)
            label = f"FP: {pred_class}"
            fp_count += 1
        cv2.rectangle(pred_img, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
        cv2.putText(pred_img, label, (int(x1), int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    fig, axs = plt.subplots(1, 2, figsize=(16, 6))
    axs[0].imshow(gt_img)
    axs[0].set_title(f"[GT] Fold {fold} - {filename}")
    axs[0].axis('off')

    axs[1].imshow(pred_img)
    axs[1].set_title(f"[Prediction] TP={tp_count}, FP={fp_count}, FN={fn_count}")
    axs[1].axis('off')

    legend_text = "TP: Matched prediction   FP: Predicted without GT   FN: Ground Truth not detected"
    fig.text(0.5, 0.01, legend_text, ha='center', fontsize=11, style='italic')

    plt.tight_layout()
    safe_filename = os.path.splitext(filename)[0]
    safe_filename = "".join(c for c in safe_filename if c.isalnum() or c in ('_', '-'))
    save_path = os.path.join(fold_dir, f"{safe_filename}_TP{tp_count}_FP{fp_count}_FN{fn_count}.jpg")
    plt.savefig(save_path)
    plt.close()

    fold_stats[fold]["TP"] += tp_count
    fold_stats[fold]["FP"] += fp_count
    fold_stats[fold]["FN"] += fn_count

    # ✅ นับภาพที่มีปัญหาเท่านั้น
    if fp_count > 0 or fn_count > 0:
        fold_stats[fold]["Images"] += 1

# === EXPORT SUMMARY PER FOLD ===
summary_rows = [
    {
        "Fold": f"Fold {k}",
        "Images": v["Images"],  # ✅ รูปที่มีปัญหา
        "TP": v["TP"],
        "FP": v["FP"],
        "FN": v["FN"]
    }
    for k, v in sorted(fold_stats.items())
]
pd.DataFrame(summary_rows).to_csv("summary_per_fold.csv", index=False)


📊 Detection & Classification Summary:

+--------+------------+----------------------+---------------------------+
| Fold   |   Total GT | Detection Accuracy   | Classification Accuracy   |
| Fold 1 |       5922 | 98.21%               | 98.21%                    |
+--------+------------+----------------------+---------------------------+
| Fold 2 |       5922 | 97.45%               | 97.45%                    |
+--------+------------+----------------------+---------------------------+
| Fold 3 |       5922 | 96.64%               | 96.64%                    |
+--------+------------+----------------------+---------------------------+
| Fold 4 |       5922 | 97.01%               | 97.01%                    |
+--------+------------+----------------------+---------------------------+

📄 Exported detailed results to 'detection_detailed_results.csv'

🖼️ Saving GT vs Prediction comparison images by fold into 'results_images/'...

