# Thesis

## Install Library

In [None]:
!pip install roboflow
!pip install ultralytics

## Import Library

In [None]:
import time
import os
import csv
import shutil
import numpy as np
import pandas as pd
import yaml
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from collections import defaultdict
from roboflow import Roboflow
from ultralytics import YOLO
from ultralytics.data.annotator import auto_annotate

## Download Dataset

In [None]:
# Dataset Object Detection
rf = Roboflow(api_key="MQdx0fMQ8FiQPaS1VHRH")
project = rf.workspace("abiya-thesis").project("plant-pathology-2021-object-detection-j1jvh")
version = project.version(9)
dataset = version.download("yolov11", location="dataset/object_detection")

In [None]:
# Dataset Mask Lesi
rf = Roboflow(api_key="MQdx0fMQ8FiQPaS1VHRH")
project = rf.workspace("abiya-thesis").project("plant-pathology-2021-instance-segmentation-h5pim")
version = project.version(4)
dataset = version.download("yolov11", location="dataset/mask_lesi")

In [None]:
# Dataset Mask Lesi and Leaf
rf = Roboflow(api_key="MQdx0fMQ8FiQPaS1VHRH")
project = rf.workspace("abiya-thesis").project("plant-pathology-2021-instance-segmentation-h5pim")
version = project.version(5)
dataset = version.download("yolov11", location="dataset/mask_lesi_and_leaf")

In [None]:
# Dataset Leaf Class Only
rf = Roboflow(api_key="MQdx0fMQ8FiQPaS1VHRH")
project = rf.workspace("abiya-thesis").project("plant-pathology-2021-object-detection-j1jvh")
version = project.version(11)
dataset = version.download("yolov11", location="dataset/object_detection_leaf_class_only")        

## Google Drive

In [None]:
GoogleDrive = False
if GoogleDrive:
    from google.colab import drive
    drive.mount('/content/gdrive')
    def copy_to_google_drive(source_folder, destination_folder):
        !cp -r /content/{source_folder} /content/gdrive/MyDrive/{destination_folder}

## Global Function

In [None]:
def measure_and_save_time(start_time, output_path):
    elapsed = time.time() - start_time
    h, rem = divmod(elapsed, 3600)
    m, s = divmod(rem, 60)
    formatted = f"{int(h)}h {int(m)}m {int(s)}s"

    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w") as f:
        f.write(formatted)

## Training Object Detection

In [None]:
# Variabel Global
models = ['yolo11n.pt', 'yolo11s.pt', 'yolo11m.pt']
sizes = ['nano', 'small', 'medium']
epochs = 100
batch = -1 # atau -1 untuk limitasi gpu 60%
data_path_objectDetection = "dataset/object_detection"
project_base_objectDetection = "results/object_detection"

In [None]:
for model_name, size in zip(models, sizes):
    print(f"Training {size} model...")
    model = YOLO(model_name)

    start_time = time.time()
    training = model.train(
        data=f"{data_path_objectDetection}/data.yaml", 
        epochs=epochs, 
        imgsz=640, 
        batch=batch, 
        project=f"{project_base_objectDetection}/training", 
        name=f"{size}",
        exist_ok=True
    )

    measure_and_save_time(start_time, f"{project_base_objectDetection}/training/summary/time/{size}-train-time.txt")
    csv_filename = f"{project_base_objectDetection}/training/summary/{size}-training-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(training.to_csv())

    validation = model.val(
        data=f"{data_path_objectDetection}/data.yaml", 
        imgsz=640,
        project=f"{project_base_objectDetection}/validation",
        name=f"{size}",
        exist_ok=True,
        split="test"
    )

    csv_filename = f"{project_base_objectDetection}/validation/summary/{size}-validation-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(validation.to_csv())

    print(f"Finished training and validating {size} model.")

In [None]:
for size in sizes:
    print(f"Running prediction for {size} model...")

    model = YOLO(f"results/object_detection/training/{size}/weights/best.pt")

    # Define the output directory for the current model size
    output_dir = f"results/object_detection/predict/{size}"
    os.makedirs(output_dir, exist_ok=True)

    results = model(
        source=f"{data_path_objectDetection}/test/images", 
        exist_ok=True,
        stream=True
    )

    for i, result in enumerate(results):
        original_filename = os.path.basename(result.path)
        save_path = os.path.join(output_dir, original_filename)
        result.save(filename=save_path)

    print(f"Finished predicting for {size} model.")

In [None]:
%matplotlib inline
for size in sizes:
    df = pd.read_csv(f"{project_base_objectDetection}/training/{size}/results.csv")

    plt.figure(figsize=(10, 6))

    # Plot mAP50 dan mAP50-95
    plt.plot(df.index, df["metrics/mAP50(B)"], label="mAP@0.5")
    plt.plot(df.index, df["metrics/mAP50-95(B)"], label="mAP@0.5:0.95")

    # Tambahkan label dan judul
    plt.xlabel("Epoch")
    plt.ylabel("mAP")
    plt.title(f"Model Performance - Model {size}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    # Simpan plot
    graph_output_dir = f"{project_base_objectDetection}/graph"
    os.makedirs(graph_output_dir, exist_ok=True)
    plt.savefig(os.path.join(graph_output_dir, f"mAP_comparison_{size}.png"))
    plt.show()

In [None]:
if GoogleDrive:
    copy_to_google_drive("dataset/object_detection", "dataset/object_detection")
    copy_to_google_drive("results/object_detection", "results/object_detection")

## Semi Auto Annotate Mask Daun

In [None]:
splits = ["train","valid",'test']
sam2_model = "sam2.1_b.pt"
data_path_maskDaun = "dataset/mask_daun"
project_base_maskDaun = "results/mask_daun"
best_objectDetection_model_size = "medium"
best_objectDetection_model_path = f"{project_base_objectDetection}/training/{best_objectDetection_model_size}/weights/best.pt"

In [None]:
# Pindahkan original images ke mask daun
for split in splits:
    shutil.copytree(src=f"{data_path_objectDetection}/{split}/images", dst=f"{data_path_maskDaun}/{split}/images", dirs_exist_ok=True)

shutil.copy(src=f"{data_path_objectDetection}/data.yaml", dst=f"{data_path_maskDaun}/data.yaml")

In [None]:
for split in splits:
    auto_annotate(data=f"{data_path_maskDaun}/{split}/images/", det_model=best_objectDetection_model_path, sam_model=sam2_model, output_dir=f"{data_path_maskDaun}/{split}/labels/")

## Training Mask Daun

In [None]:
# Variabel Global
models = ['yolo11n-seg.pt', 'yolo11s-seg.pt', 'yolo11m-seg.pt']
sizes = ['nano', 'small', 'medium']
epochs = 100
batch = -1 # atau -1 untuk limitasi gpu 60%
data_path_maskDaun = "dataset/mask_daun"
project_base_maskDaun = "results/mask_daun"

In [None]:
for model_name, size in zip(models, sizes):
    print(f"Training {size} model...")
    model = YOLO(model_name)

    start_time = time.time()
    training = model.train(
        data=f"{data_path_maskDaun}/data.yaml", 
        epochs=epochs, 
        imgsz=640, 
        batch=batch, 
        project=f"{project_base_maskDaun}/training", 
        name=f"{size}",
        exist_ok=True
    )

    measure_and_save_time(start_time, f"{project_base_maskDaun}/training/summary/time/{size}-train-time.txt")
    csv_filename = f"{project_base_maskDaun}/training/summary/{size}-training-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(training.to_csv())

    validation = model.val(
        data=f"{data_path_maskDaun}/data.yaml", 
        imgsz=640,
        project=f"{project_base_maskDaun}/validation",
        name=f"{size}",
        exist_ok=True,
        split="test"
    )

    csv_filename = f"{project_base_maskDaun}/validation/summary/{size}-validation-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(validation.to_csv())

    print(f"Finished training and validating {size} model.")

In [None]:
for size in sizes:
    print(f"Running prediction for {size} model...")

    model = YOLO(f"results/mask_daun/training/{size}/weights/best.pt")

    # Define the output directory for the current model size
    output_dir = f"results/mask_daun/predict/{size}"
    os.makedirs(output_dir, exist_ok=True)

    results = model(
        source=f"{data_path_maskDaun}/test/images", 
        exist_ok=True,
        stream=True
    )

    for i, result in enumerate(results):
        original_filename = os.path.basename(result.path)
        save_path = os.path.join(output_dir, original_filename)
        result.save(filename=save_path)

    print(f"Finished predicting for {size} model.")

In [None]:
%matplotlib inline
for size in sizes:
    df = pd.read_csv(f"{project_base_maskDaun}/training/{size}/results.csv")

    plt.figure(figsize=(10, 6))

    # Plot mAP50 dan mAP50-95
    plt.plot(df.index, df["metrics/mAP50(B)"], label="mAP@0.5")
    plt.plot(df.index, df["metrics/mAP50-95(B)"], label="mAP@0.5:0.95")

    # Tambahkan label dan judul
    plt.xlabel("Epoch")
    plt.ylabel("mAP")
    plt.title(f"Model Performance - Model {size}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    # Simpan plot
    graph_output_dir = f"{project_base_maskDaun}/graph"
    os.makedirs(graph_output_dir, exist_ok=True)
    plt.savefig(os.path.join(graph_output_dir, f"mAP_comparison_{size}.png"))
    plt.show()

In [None]:
if GoogleDrive:
    copy_to_google_drive("dataset/mask_daun", "dataset/mask_daun")
    copy_to_google_drive("results/mask_daun", "results/mask_daun")

## Training Mask Lesi

In [None]:
# Variabel Global
models = ['yolo11n-seg.pt', 'yolo11s-seg.pt', 'yolo11m-seg.pt']
sizes = ['nano', 'small', 'medium']
epochs = 100
batch = -1 # atau -1 untuk limitasi gpu 60%
data_path_maskLesi = "dataset/mask_lesi"
project_base_maskLesi = "results/mask_lesi"

In [None]:
for model_name, size in zip(models, sizes):
    print(f"Training {size} model...")
    model = YOLO(model_name)

    start_time = time.time()
    training = model.train(
        data=f"{data_path_maskLesi}/data.yaml", 
        epochs=epochs, 
        imgsz=640, 
        batch=batch, 
        project=f"{project_base_maskLesi}/training", 
        name=f"{size}",
        exist_ok=True
    )

    measure_and_save_time(start_time, f"{project_base_maskLesi}/training/summary/time/{size}-train-time.txt")
    csv_filename = f"{project_base_maskLesi}/training/summary/{size}-training-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(training.to_csv())

    validation = model.val(
        data=f"{data_path_maskLesi}/data.yaml", 
        imgsz=640,
        project=f"{project_base_maskLesi}/validation",
        name=f"{size}",
        exist_ok=True,
        split="test"
    )

    csv_filename = f"{project_base_maskLesi}/validation/summary/{size}-validation-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(validation.to_csv())

    print(f"Finished training and validating {size} model.")

In [None]:
for size in sizes:
    print(f"Running prediction for {size} model...")

    model = YOLO(f"results/mask_lesi/training/{size}/weights/best.pt")

    # Define the output directory for the current model size
    output_dir = f"results/mask_lesi/predict/{size}"
    os.makedirs(output_dir, exist_ok=True)

    results = model(
        source=f"{data_path_maskLesi}/test/images", # Kedepannya ubah menjadi /test jika sudah ada test yang di anotasi
        exist_ok=True,
        stream=True
    )

    for i, result in enumerate(results):
        original_filename = os.path.basename(result.path)
        save_path = os.path.join(output_dir, original_filename)
        result.save(filename=save_path)

    print(f"Finished predicting for {size} model.")

In [None]:
%matplotlib inline
for size in sizes:
    df = pd.read_csv(f"{project_base_maskLesi}/training/{size}/results.csv")

    plt.figure(figsize=(10, 6))

    # Plot mAP50 dan mAP50-95
    plt.plot(df.index, df["metrics/mAP50(B)"], label="mAP@0.5")
    plt.plot(df.index, df["metrics/mAP50-95(B)"], label="mAP@0.5:0.95")

    # Tambahkan label dan judul
    plt.xlabel("Epoch")
    plt.ylabel("mAP")
    plt.title(f"Model Performance - Model {size}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    # Simpan plot
    graph_output_dir = f"{project_base_maskLesi}/graph"
    os.makedirs(graph_output_dir, exist_ok=True)
    plt.savefig(os.path.join(graph_output_dir, f"mAP_comparison_{size}.png"))
    plt.show()

In [None]:
if GoogleDrive:
    copy_to_google_drive("dataset/mask_lesi", "dataset/mask_lesi")
    copy_to_google_drive("results/mask_lesi", "results/mask_lesi")

## Training Mask Lesi dan Daun

In [None]:
# Variabel Global
models = ['yolo11n-seg.pt', 'yolo11s-seg.pt', 'yolo11m-seg.pt']
sizes = ['nano', 'small', 'medium']
epochs = 100
batch = -1 # atau -1 untuk limitasi gpu 60%
data_path_maskLesi_and_maskLeaf = "dataset/mask_lesi_and_leaf"
project_base_maskLesi_and_maskLeaf = "results/mask_lesi_and_leaf"

In [None]:
for model_name, size in zip(models, sizes):
    print(f"Training {size} model...")
    model = YOLO(model_name)

    start_time = time.time()
    training = model.train(
        data=f"{data_path_maskLesi_and_maskLeaf}/data.yaml", 
        epochs=epochs, 
        imgsz=640, 
        batch=batch, 
        project=f"{project_base_maskLesi_and_maskLeaf}/training", 
        name=f"{size}",
        exist_ok=True
    )

    measure_and_save_time(start_time, f"{project_base_maskLesi_and_maskLeaf}/training/summary/time/{size}-train-time.txt")
    csv_filename = f"{project_base_maskLesi_and_maskLeaf}/training/summary/{size}-training-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(training.to_csv())

    validation = model.val(
        data=f"{data_path_maskLesi_and_maskLeaf}/data.yaml", 
        imgsz=640,
        project=f"{project_base_maskLesi}/validation",
        name=f"{size}",
        exist_ok=True,
        split="test"
    )

    csv_filename = f"{project_base_maskLesi}/validation/summary/{size}-validation-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(validation.to_csv())

    print(f"Finished training and validating {size} model.")

In [None]:
for size in sizes:
    print(f"Running prediction for {size} model...")

    model = YOLO(f"results/mask_lesi/training/{size}/weights/best.pt")

    # Define the output directory for the current model size
    output_dir = f"results/mask_lesi/predict/{size}"
    os.makedirs(output_dir, exist_ok=True)

    results = model(
        source=f"{data_path_maskLesi_and_maskLeaf}/test/images",
        exist_ok=True,
        stream=True
    )

    for i, result in enumerate(results):
        original_filename = os.path.basename(result.path)
        save_path = os.path.join(output_dir, original_filename)
        result.save(filename=save_path)

    print(f"Finished predicting for {size} model.")

In [None]:
%matplotlib inline
for size in sizes:
    df = pd.read_csv(f"{project_base_maskLesi_and_maskLeaf}/training/{size}/results.csv")

    plt.figure(figsize=(10, 6))

    # Plot mAP50 dan mAP50-95
    plt.plot(df.index, df["metrics/mAP50(B)"], label="mAP@0.5")
    plt.plot(df.index, df["metrics/mAP50-95(B)"], label="mAP@0.5:0.95")

    # Tambahkan label dan judul
    plt.xlabel("Epoch")
    plt.ylabel("mAP")
    plt.title(f"Model Performance - Model {size}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    # Simpan plot
    graph_output_dir = f"{project_base_maskLesi_and_maskLeaf}/graph"
    os.makedirs(graph_output_dir, exist_ok=True)
    plt.savefig(os.path.join(graph_output_dir, f"mAP_comparison_{size}.png"))
    plt.show()

In [None]:
if GoogleDrive:
    copy_to_google_drive("dataset/mask_lesi_and_leaf", "dataset/mask_lesi_and_leaf")
    copy_to_google_drive("results/mask_lesi_and_leaf", "results/mask_lesi_and_leaf")

## Object Detection Class Leaf Only

In [None]:
# Variabel Global
models = ['yolo11n.pt', 'yolo11s.pt', 'yolo11m.pt']
sizes = ['nano', 'small', 'medium']
epochs = 100
batch = -1 # atau -1 untuk limitasi gpu 60%
data_path_objectDetection_leaf_class_only = "dataset/object_detection_leaf_class_only"
project_base_objectDetection_leaf_class_only = "results/object_detection_leaf_class_only"

In [None]:
for model_name, size in zip(models, sizes):
    print(f"Training {size} model...")
    model = YOLO(model_name)

    training = model.train(
        data=f"{data_path_objectDetection_leaf_class_only}/data.yaml", 
        epochs=epochs, 
        imgsz=640, 
        batch=batch, 
        project=f"{project_base_objectDetection_leaf_class_only}/training", 
        name=f"{size}",
        exist_ok=True
    )

    csv_filename = f"{project_base_objectDetection_leaf_class_only}/training/summary/{size}-training-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(training.to_csv())

    validation = model.val(
        data=f"{data_path_objectDetection_leaf_class_only}/data.yaml", 
        imgsz=640,
        project=f"{project_base_objectDetection_leaf_class_only}/validation",
        name=f"{size}",
        exist_ok=True,
        split="test"
    )

    csv_filename = f"{project_base_objectDetection_leaf_class_only}/validation/summary/{size}-validation-metrics.csv"
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, "w") as f:
        f.write(validation.to_csv())

    print(f"Finished training and validating {size} model.")

In [None]:
for size in sizes:
    print(f"Running prediction for {size} model...")

    model = YOLO(f"results/object_detection_leaf_class_only/training/{size}/weights/best.pt")

    # Define the output directory for the current model size
    output_dir = f"results/object_detection_leaf_class_only/predict/{size}"
    os.makedirs(output_dir, exist_ok=True)

    results = model(
        source=f"{data_path_objectDetection_leaf_class_only}/test/images", 
        exist_ok=True,
        stream=True
    )

    for i, result in enumerate(results):
        original_filename = os.path.basename(result.path)
        save_path = os.path.join(output_dir, original_filename)
        result.save(filename=save_path)

    print(f"Finished predicting for {size} model.")

In [None]:
%matplotlib inline
for size in sizes:
    df = pd.read_csv(f"{project_base_objectDetection_leaf_class_only}/training/{size}/results.csv")

    plt.figure(figsize=(10, 6))

    # Plot mAP50 dan mAP50-95
    plt.plot(df.index, df["metrics/mAP50(B)"], label="mAP@0.5")
    plt.plot(df.index, df["metrics/mAP50-95(B)"], label="mAP@0.5:0.95")

    # Tambahkan label dan judul
    plt.xlabel("Epoch")
    plt.ylabel("mAP")
    plt.title(f"Model Performance - Model {size}")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    # Simpan plot
    graph_output_dir = f"{project_base_objectDetection_leaf_class_only}/graph"
    os.makedirs(graph_output_dir, exist_ok=True)
    plt.savefig(os.path.join(graph_output_dir, f"mAP_comparison_{size}.png"))
    plt.show()

In [None]:
if GoogleDrive:
    copy_to_google_drive("dataset/object_detection_leaf_class_only", "dataset/object_detection_leaf_class_only")
    copy_to_google_drive("results/object_detection_leaf_class_only", "results/object_detection_leaf_class_only")

## Fine Tune For Detect Healthy, Rust, and Scab

In [None]:
# Path sumber dataset YOLO
image_dir = Path("dataset/object_detection/train/images")
label_dir = Path("dataset/object_detection/train/labels")

# Path target few-shot
few_shot_base = Path("dataset/few_shot")

# Mapping class ID ke nama
class_map = {0: "frog-eye-leaf-spot", 1: "healthy", 2: "rust"}

# Buat index gambar per class berdasarkan nama file
class_to_files = defaultdict(list)
for img_file in sorted(image_dir.glob("*.jpg")):
    filename_lower = img_file.name.lower()
    if "frog-eye-leaf-spot" in filename_lower:
        cls = 0
    elif "healthy" in filename_lower:
        cls = 1
    elif "rust" in filename_lower:
        cls = 2
    else:
        continue  # Skip jika tidak dikenali

    label_file = label_dir / (img_file.stem + ".txt")
    if label_file.exists():
        class_to_files[cls].append((img_file, label_file))

# Generate few-shot data
for n in [5] + list(range(10, 51, 5)):
    target_image_dir = few_shot_base / f"{n}-shot" / "images"
    target_label_dir = few_shot_base / f"{n}-shot" / "labels"
    target_image_dir.mkdir(parents=True, exist_ok=True)
    target_label_dir.mkdir(parents=True, exist_ok=True)

    for cls in class_map:
        files = class_to_files[cls][:n]  # ambil berurutan
        for img_path, label_path in files:
            shutil.copy(img_path, target_image_dir / img_path.name)
            shutil.copy(label_path, target_label_dir / label_path.name)

# Copy data.yaml
shutil.copy("dataset/object_detection/data.yaml", "dataset/few_shot/data.yaml")

In [None]:
# Variabel Global
models = {"nano": "results/object_detection_leaf_class_only/training/nano/weights/best.pt",
          "small": "results/object_detection_leaf_class_only/training/small/weights/best.pt",
          "medium": "results/object_detection_leaf_class_only/training/medium/weights/best.pt"}
sizes = ["nano", "small", "medium"]
shots = [5] + list(range(10, 51, 5))
size_to_shots = {
    "nano":   shots,
    "small":  shots,
    "medium": shots
}
epochs = 50
batch = -1 # atau -1 untuk limitasi gpu 60%
data_path_few_shot = "dataset/few_shot"
project_base_few_shot = "results/few_shot"

In [None]:
for size in sizes:
    for shot in size_to_shots[size]:
        # 1. Load original YAML
        data_yaml_path = f"{data_path_few_shot}/data.yaml"
        with open(data_yaml_path, 'r') as f:
            data_yaml = yaml.safe_load(f)

        # 2. Ubah path train sesuai shot
        data_yaml['train'] = f"../{shot}-shot/images"

        # 3. Simpan YAML baru ke file sementara
        temp_yaml_path = f"{data_path_few_shot}/data-shot.yaml"
        with open(temp_yaml_path, 'w') as f:
            yaml.dump(data_yaml, f, sort_keys=False, default_flow_style=True)

        model = YOLO(models[size])
        start_time = time.time()
        training = model.train(
            data = temp_yaml_path,
            epochs = epochs,
            imgsz=640,
            batch=batch,
            project=f"{project_base_few_shot}/training",
            name=f"{shot}-shot-{size}",
            exist_ok=True,
            # patience=25,
            # freeze=8
        )

        csv_filename = f"{project_base_few_shot}/training/summary/{shot}-shot-{size}-training-metrics.csv"
        os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
        with open(csv_filename, "w") as f:
            f.write(training.to_csv())

        validation = model.val(
            data = temp_yaml_path,
            imgsz=640,
            project=f"{project_base_few_shot}/validation",
            name=f"{shot}-shot-{size}",
            exist_ok=True,
            split="test"
        )

        csv_filename = f"{project_base_few_shot}/validation/summary/{shot}-shot-{size}-validation-metrics.csv"
        os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
        with open(csv_filename, "w") as f:
            f.write(validation.to_csv())

In [None]:
%matplotlib inline
for shot in shots:
    df = pd.read_csv(f"{project_base_few_shot}/training/{shot}-shot-{size}/results.csv")

    plt.figure(figsize=(10, 6))

    # Plot mAP50 dan mAP50-95
    plt.plot(df.index, df["metrics/mAP50(B)"], label="mAP@0.5")
    plt.plot(df.index, df["metrics/mAP50-95(B)"], label="mAP@0.5:0.95")

    # Tambahkan label dan judul
    plt.xlabel("Epoch")
    plt.ylabel("mAP")
    plt.title(f"Model Performance - {size} | {shot}-shot")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()

    # Simpan plot
    graph_output_dir = f"{project_base_few_shot}/graph"
    os.makedirs(graph_output_dir, exist_ok=True)
    plt.savefig(os.path.join(graph_output_dir, f"mAP_comparison_{shot}-shot-{size}.png"))
    plt.show()

In [None]:
if GoogleDrive:
    copy_to_google_drive("dataset/few_shot", "dataset/few_shot")
    copy_to_google_drive("results/few_shot", "results/few_shot")

## Perbandingan Beberapa k-query Set vs n-shot

In [None]:
# Path sumber dataset YOLO
image_dir = Path("dataset/object_detection/train/images")
label_dir = Path("dataset/object_detection/train/labels")

# Path target support-set
support_set_base = Path("dataset/support_set")

# Mapping class ID ke nama
class_map = {0: "frog-eye-leaf-spot", 1: "healthy", 2: "rust"}

# Buat index gambar per class berdasarkan nama file
class_to_files = defaultdict(list)
for img_file in sorted(image_dir.glob("*.jpg")):
    filename_lower = img_file.name.lower()
    if "frog-eye-leaf-spot" in filename_lower:
        cls = 0
    elif "healthy" in filename_lower:
        cls = 1
    elif "rust" in filename_lower:
        cls = 2
    else:
        continue  # Skip jika tidak dikenali

    label_file = label_dir / (img_file.stem + ".txt")
    if label_file.exists():
        class_to_files[cls].append((img_file, label_file))

# Generate support-set data
for n in [5] + list(range(10, 51, 5)):
    target_image_dir = support_set_base / f"{n}-images" / "images"
    target_label_dir = support_set_base / f"{n}-images" / "labels"
    target_image_dir.mkdir(parents=True, exist_ok=True)
    target_label_dir.mkdir(parents=True, exist_ok=True)

    for cls in class_map:
        files = class_to_files[cls][-n:]  # ambil n gambar terakhir
        for img_path, label_path in files:
            shutil.copy(img_path, target_image_dir / img_path.name)
            shutil.copy(label_path, target_label_dir / label_path.name)

# Copy data.yaml
shutil.copy("dataset/object_detection/data.yaml", "dataset/support_set/data.yaml")

In [None]:
# ======================================================
# Variabel Global
# ======================================================
sizes = ["nano", "small", "medium"]
shots = [5] + list(range(10, 51, 5))
project_base_few_shot = "results/few_shot"
support_set_base = "dataset/support_set"

# Path YAML original
data_yaml_original_path = f"{support_set_base}/data.yaml"
temp_data_yaml_dir = os.path.dirname(data_yaml_original_path)
os.makedirs(temp_data_yaml_dir, exist_ok=True)

# Folder summary
summary_dir = f"{project_base_few_shot}/validation/summary"
os.makedirs(summary_dir, exist_ok=True)

# ======================================================
# Dictionary hasil evaluasi
# all_map50_scores[size][model_shot][test_images] = mAP
# ======================================================
all_map50_scores = {size: {model_shot: {} for model_shot in shots} for size in sizes}

# ======================================================
# Loop evaluasi
# ======================================================
for size in sizes:
    for model_shot in shots:
        print(f"\n--- Evaluasi model {size} ({model_shot}-shot) ---")
        model_path = f"{project_base_few_shot}/training/{model_shot}-shot-{size}/weights/best.pt"
        model = YOLO(model_path)

        for test_images in shots:
            # 1. Load yaml asli
            with open(data_yaml_original_path, 'r') as f:
                data_yaml = yaml.safe_load(f)

            # 2. Set test/val/train ke support-set test_images
            data_yaml['test']  = f"../{test_images}-images/images"
            data_yaml['val']   = f"../{test_images}-images/images"
            data_yaml['train'] = f"../{test_images}-images/images"

            # 3. Simpan yaml sementara
            temp_yaml_path = os.path.join(
                temp_data_yaml_dir,
                f"eval_{model_shot}-shot-{size}_on_{test_images}-images.yaml"
            )
            with open(temp_yaml_path, 'w') as f:
                yaml.dump(data_yaml, f, sort_keys=False, default_flow_style=False)

            # 4. Validasi
            results = model.val(
                data=temp_yaml_path,
                split="test",
                imgsz=640,
                project=f"{project_base_few_shot}/validation",
                name=f"{model_shot}-shot-{size}-on-{test_images}-images",
                exist_ok=True
            )
            map50 = results.results_dict.get("metrics/mAP50(B)", 0.0)
            all_map50_scores[size][model_shot][test_images] = map50

            print(f"Model {model_shot}-shot-{size} diuji di {test_images}-images → mAP50 = {map50:.4f}")

        # Hapus folder runs agar tidak menumpuk
        if os.path.exists("runs"):
            shutil.rmtree("runs")

# ======================================================
# Simpan hasil ke CSV summary per size
# ======================================================
for size in sizes:
    df = pd.DataFrame(all_map50_scores[size]).T  # model_shot sebagai index, test_images sebagai kolom
    csv_path = os.path.join(summary_dir, f"{size}-results.csv")
    df.to_csv(csv_path, index=True)
    print(f"Hasil ringkasan {size} disimpan di {csv_path}")

# ======================================================
# Visualisasi hasil (contoh: plot semua size)
# ======================================================
plt.figure(figsize=(15, 8))
plt.title("Perbandingan mAP@0.5 Few-Shot (Semua Model)")
plt.xlabel("Jumlah Gambar per Kelas (Support-Set)")
plt.ylabel("mAP@0.5")
plt.xticks(shots)
plt.grid(True, linestyle='--', alpha=0.7)

for size in sizes:
    for model_shot in shots:
        sorted_scores = sorted(all_map50_scores[size][model_shot].items())
        x = [item[0] for item in sorted_scores]
        y = [item[1] for item in sorted_scores]
        plt.plot(x, y, marker='o', linestyle='-',
                 label=f"{size} {model_shot}-shot")

plt.legend(title="Model (size & shot)", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

# ======================================================
# Cetak hasil ke console
# ======================================================
print("\n--- Hasil Lengkap mAP@0.5 ---")
for size in sizes:
    print(f"\nModel {size}:")
    for model_shot, scores_by_test in all_map50_scores[size].items():
        for test_img_count, score in scores_by_test.items():
            print(f"  - {model_shot}-shot diuji di {test_img_count}-images: {score:.4f}")

In [None]:
# import yaml
# import matplotlib.pyplot as plt
# from ultralytics import YOLO
# import os
# import shutil # Import shutil untuk operasi penghapusan folder

# # Define the models to be evaluated and the test datasets
# # Models trained with these 'shot' counts
# lists_model_shots = [5] + list(range(10, 51, 5))
# # Test datasets with these 'image' counts per class
# lists_test_dataset_images = [5] + list(range(10, 51, 5))

# # Dictionary to store mAP@0.5 scores for each model-test_dataset combination
# # Format: {model_shot_count: {test_dataset_image_count: mAP50_score}}
# all_map50_scores = {}

# # Path to your original data.yaml file
# data_yaml_original_path = "dataset/support_set/data.yaml"
# # **MODIFIKASI DI SINI:** Temporary directory to save modified data.yaml files
# # Sekarang akan disimpan di direktori yang sama dengan data_yaml_original_path
# temp_data_yaml_dir = os.path.dirname(data_yaml_original_path)
# # Default base directory for Ultralytics YOLO runs
# yolo_runs_dir = "runs" # This is the default folder where YOLO saves validation results

# # Ensure the temporary directory for YAMLs exists (though it should for support_set)
# os.makedirs(temp_data_yaml_dir, exist_ok=True)

# # Function to clear the runs/detect folder
# def clear_yolo_runs_directory(base_dir="."):
#     """
#     Menghapus folder 'runs/detect' dari direktori dasar yang diberikan.
#     """
#     detect_run_path = os.path.join(base_dir, yolo_runs_dir, 'detect')
#     if os.path.exists(detect_run_path):
#         print(f"Menghapus direktori: {detect_run_path}")
#         try:
#             shutil.rmtree(detect_run_path)
#             print(f"Direktori {detect_run_path} berhasil dihapus.")
#         except OSError as e:
#             print(f"Error: Gagal menghapus direktori {detect_run_path}. {e}")
#     else:
#         print(f"Direktori {detect_run_path} tidak ditemukan, tidak perlu dihapus.")

# # --- Start of Evaluation Loop ---
# # Loop through each trained model
# for model_shot_count in lists_model_shots:
#     print(f"\n--- Testing Model: {model_shot_count}-shot ---")

#     # Clear YOLO runs directory before validating each model
#     # This ensures a clean slate for each model's validation across test sets
#     clear_yolo_runs_directory()

#     # Initialize a dictionary to store scores for the current model
#     all_map50_scores[model_shot_count] = {}

#     # # Load the trained model
#     model_path = f"results/few_shot/training/{model_shot_count}-shot/weights/best.pt"
#     # try:
#     #     model = YOLO(model_path)
#     # except Exception as e:
#     #     print(f"Error loading model {model_path}: {e}. Skipping this model.")
#     #     continue # Skip to the next model if loading fails

#     # Loop through each test dataset
#     for test_image_count in lists_test_dataset_images:
#         model = YOLO(model_path)
#         # 1. Load the original data.yaml
#         with open(data_yaml_original_path, 'r') as f:
#             data_yaml = yaml.safe_load(f)

#         # 2. Update the 'test' path to the current test dataset
#         # This path is relative to the data.yaml itself.
#         # Ensure that 'dataset/support_set' is the base for 'X-images'
#         # For example, if data.yaml is in 'dataset/support_set/',
#         # and images are in 'dataset/support_set/5-images/',
#         # then the relative path from data.yaml to 5-images is '../5-images/images'
#         # This relative path needs to be correct based on your actual file structure.
#         data_yaml['test'] = f"../{test_image_count}-images/images"
#         data_yaml['train'] = f"../{test_image_count}-images/images"
#         data_yaml['val'] = f"../{test_image_count}-images/images"


#         # 3. Save the modified YAML to a temporary file in the specified directory
#         # **MODIFIKASI DI SINI:** temp_yaml_path sekarang menunjuk ke data_yaml_original_path's directory
#         temp_yaml_filename = f"data_support_test_{model_shot_count}model_{test_image_count}.yaml"
#         temp_yaml_path = os.path.join(temp_data_yaml_dir, temp_yaml_filename)
#         with open(temp_yaml_path, 'w') as f:
#             yaml.dump(data_yaml, f, sort_keys=False, default_flow_style=False)

#         # 4. Perform model validation
#         print(f"  Validating {model_shot_count}-shot model on {test_image_count}-images dataset...")
#         try:
#             # Pass the path to the newly created temporary YAML file
#             print("temp_yaml_path",temp_yaml_path)
#             results = model.val(data=temp_yaml_path)
#             # Get mAP@0.5 score
#             map50 = results.results_dict.get("metrics/mAP50(B)", 0.0)
#             all_map50_scores[model_shot_count][test_image_count] = map50
#             print(f"    mAP@0.5: {map50:.4f}")
#         except Exception as e:
#             print(f"Error validating {model_shot_count}-shot model on {test_image_count}-images: {e}")
#             all_map50_scores[model_shot_count][test_image_count] = 0.0 # Set to 0 if an error occurs

#         # Optional: Remove the temporary YAML file after use
#         # os.rmdir("runs/")
#         shutil.rmtree("runs/")

In [None]:
# %matplotlib inline
# # --- Visualization of Results ---
# plt.figure(figsize=(15, 8))
# plt.title("Perbandingan mAP@0.5 Model Few-Shot pada Berbagai Ukuran Dataset Pengujian")
# plt.xlabel("Ukuran Dataset Pengujian (Jumlah Gambar per Kelas)")
# plt.ylabel("mAP@0.5")
# plt.xticks(lists_test_dataset_images)
# plt.grid(True, linestyle='--', alpha=0.7)

# # Plot a line for each trained model
# for model_shot_count, test_scores_dict in all_map50_scores.items():
#     # Sort the test scores by dataset size for proper plotting
#     sorted_test_scores = sorted(test_scores_dict.items())

#     # Extract test dataset image counts and corresponding mAP50 scores
#     test_dataset_sizes = [item[0] for item in sorted_test_scores]
#     map50_values = [item[1] for item in sorted_test_scores]

#     plt.plot(test_dataset_sizes, map50_values, marker='o', linestyle='-',
#              label=f"{model_shot_count}-shot Model")

# plt.legend(title="Model Dilatih (Shot)", loc='lower right')
# plt.tight_layout()
# plt.show()

# # Optional: Print the full results in a table-like format
# print("\n--- Complete mAP@0.5 Results ---")
# for model_shot_count, scores_by_dataset in all_map50_scores.items():
#     print(f"Model {model_shot_count}-shot:")
#     for test_img_count, score in scores_by_dataset.items():
#         print(f"  - Tested on {test_img_count}-images: {score:.4f}")

In [None]:
if GoogleDrive:
    copy_to_google_drive("dataset/support_set", "dataset/support_set")
    copy_to_google_drive("results/support_set", "results/support_set")

## Severity Estimation

### SINGLE-STAGE (tanpa deteksi) — ambil SATU daun terbesar dari mask segmen

In [None]:
# ==========================
# SINGLE-STAGE · LEAF TERBESAR
# ==========================
from ultralytics import YOLO
import numpy as np, cv2, os, glob, pandas as pd
from pathlib import Path

# ---------- CFG (ISI SENDIRI) ----------
SEG_MODEL_PATH   = "PATH/TO/yolo11?-seg-best.pt"  # model seg dengan 5 kelas berikut
INPUT_DIR        = "PATH/TO/images"
OUTPUT_DIR       = "PATH/TO/output_single_largest"
CSV_PATH         = str(Path(OUTPUT_DIR) / "summary_single_largest.csv")
CONF_THRESH      = 0.25
IOU_THRESH       = 0.5
MASK_THRESHOLD   = 0.5
SAVE_OVERLAY     = True
# ---------------------------------------

# Kelas segmentasi (urutannya HARUS sesuai training):
# 0: frog-eye-leaf-spot (leaf), 1: frog-eye-leaf-spot_lession,
# 2: healthy (leaf), 3: rust (leaf), 4: rust_lession
SEG_LEAF_IDS   = [0, 2, 3]
PAIR_LESION_ID = {0: 1, 3: 4}  # mapping leaf->lesion; healthy(2) tidak punya lesion

os.makedirs(OUTPUT_DIR, exist_ok=True)
model = YOLO(SEG_MODEL_PATH)
image_paths = sorted(glob.glob(str(Path(INPUT_DIR) / "*.*")))
records = []

for img_path in image_paths:
    img_name = Path(img_path).name
    pred = model.predict(source=img_path, conf=CONF_THRESH, iou=IOU_THRESH, verbose=False)[0]

    if pred.masks is None or pred.boxes is None or len(pred.masks) == 0:
        records.append({"image": img_name, "leaf_class": None, "leaf_px": 0, "lesion_px": 0, "severity_pct": 0.0})
        continue

    masks = (pred.masks.data.cpu().numpy() > MASK_THRESHOLD)  # (N,H,W) bool
    cls   = pred.boxes.cls.cpu().numpy().astype(int)

    # Kumpulkan semua leaf-masks beserta class_id-nya
    leaf_idxs = [i for i, c in enumerate(cls) if c in SEG_LEAF_IDS]
    if not leaf_idxs:
        records.append({"image": img_name, "leaf_class": None, "leaf_px": 0, "lesion_px": 0, "severity_pct": 0.0})
        continue

    leaf_stack = np.stack([masks[i] for i in leaf_idxs], axis=0)
    areas = leaf_stack.reshape(len(leaf_idxs), -1).sum(axis=1)
    best_i = leaf_idxs[int(np.argmax(areas))]
    leaf_mask = masks[best_i]
    leaf_class = cls[best_i]

    # Tentukan lesion-mask pasangannya (jika ada)
    lesion_mask = np.zeros_like(leaf_mask, dtype=bool)
    if leaf_class in PAIR_LESION_ID:
        lesion_id = PAIR_LESION_ID[leaf_class]
        lesion_idxs = [i for i, c in enumerate(cls) if c == lesion_id]
        if lesion_idxs:
            lesion_mask = np.any(masks[lesion_idxs], axis=0)

    lesion_in_leaf = lesion_mask & leaf_mask
    leaf_px   = int(leaf_mask.sum())
    lesion_px = int(lesion_in_leaf.sum())
    sev = (lesion_px / leaf_px * 100.0) if leaf_px > 0 else 0.0

    records.append({
        "image": img_name,
        "leaf_class": int(leaf_class),
        "leaf_px": leaf_px,
        "lesion_px": lesion_px,
        "severity_pct": sev
    })

    if SAVE_OVERLAY:
        img = cv2.imread(img_path)
        overlay = img.copy()
        overlay[leaf_mask]        = (0.7*overlay[leaf_mask] + 0.3*np.array([0,255,0])).astype(np.uint8)
        overlay[lesion_in_leaf]   = (0.7*overlay[lesion_in_leaf] + 0.3*np.array([0,0,255])).astype(np.uint8)
        cv2.putText(overlay, f"Severity: {sev:.2f}%", (10, overlay.shape[0]-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0), 2, cv2.LINE_AA)
        cv2.imwrite(str(Path(OUTPUT_DIR) / f"{Path(img_name).stem}_overlay.jpg"), overlay)

pd.DataFrame(records).to_csv(CSV_PATH, index=False)
print(f"[Single-stage · largest] CSV saved -> {CSV_PATH}")


### SINGLE-STAGE (tanpa deteksi) — semua daun pada gambar

In [None]:
# ==========================
# SINGLE-STAGE · SEMUA DAUN
# ==========================
from ultralytics import YOLO
import numpy as np, cv2, os, glob, pandas as pd
from pathlib import Path

# ---------- CFG (ISI SENDIRI) ----------
SEG_MODEL_PATH   = "PATH/TO/yolo11?-seg-best.pt"
INPUT_DIR        = "PATH/TO/images"
OUTPUT_DIR       = "PATH/TO/output_single_all"
CSV_PATH         = str(Path(OUTPUT_DIR) / "summary_single_all.csv")
CONF_THRESH      = 0.25
IOU_THRESH       = 0.5
MASK_THRESHOLD   = 0.5
SAVE_OVERLAY     = True
# ---------------------------------------

SEG_LEAF_IDS   = [0, 2, 3]         # leaf classes
PAIR_LESION_ID = {0: 1, 3: 4}      # mapping leaf->lesion

os.makedirs(OUTPUT_DIR, exist_ok=True)
model = YOLO(SEG_MODEL_PATH)
image_paths = sorted(glob.glob(str(Path(INPUT_DIR) / "*.*")))
rows, overlay_suffix = [], "_overlay_all.jpg"

for img_path in image_paths:
    img_name = Path(img_path).name
    pred = model.predict(source=img_path, conf=CONF_THRESH, iou=IOU_THRESH, verbose=False)[0]

    if pred.masks is None or pred.boxes is None or len(pred.masks) == 0:
        rows.append({"image": img_name, "leaf_index": None, "leaf_class": None,
                     "leaf_px": 0, "lesion_px": 0, "severity_pct": 0.0})
        continue

    masks = (pred.masks.data.cpu().numpy() > MASK_THRESHOLD)
    cls   = pred.boxes.cls.cpu().numpy().astype(int)

    # Siapkan overlay keseluruhan sekali saja
    overlay = cv2.imread(img_path) if SAVE_OVERLAY else None

    leaf_idxs = [i for i, c in enumerate(cls) if c in SEG_LEAF_IDS]
    if not leaf_idxs:
        rows.append({"image": img_name, "leaf_index": None, "leaf_class": None,
                     "leaf_px": 0, "lesion_px": 0, "severity_pct": 0.0})
    else:
        for k, li in enumerate(leaf_idxs):
            leaf_mask   = masks[li]
            leaf_class  = cls[li]

            lesion_mask = np.zeros_like(leaf_mask, dtype=bool)
            if leaf_class in PAIR_LESION_ID:
                lesion_id = PAIR_LESION_ID[leaf_class]
                lesion_idxs = [i for i, c in enumerate(cls) if c == lesion_id]
                if lesion_idxs:
                    lesion_mask = np.any(masks[lesion_idxs], axis=0)

            lesion_in_leaf = lesion_mask & leaf_mask
            leaf_px   = int(leaf_mask.sum())
            lesion_px = int(lesion_in_leaf.sum())
            sev = (lesion_px / leaf_px * 100.0) if leaf_px > 0 else 0.0

            rows.append({"image": img_name, "leaf_index": k, "leaf_class": int(leaf_class),
                         "leaf_px": leaf_px, "lesion_px": lesion_px, "severity_pct": sev})

            if SAVE_OVERLAY:
                overlay[leaf_mask]        = (0.7*overlay[leaf_mask] + 0.3*np.array([0,255,0])).astype(np.uint8)
                overlay[lesion_in_leaf]   = (0.7*overlay[lesion_in_leaf] + 0.3*np.array([0,0,255])).astype(np.uint8)

    if SAVE_OVERLAY:
        cv2.imwrite(str(Path(OUTPUT_DIR) / f"{Path(img_name).stem}{overlay_suffix}"), overlay)

pd.DataFrame(rows).to_csv(CSV_PATH, index=False)
print(f"[Single-stage · all leaves] CSV saved -> {CSV_PATH}")


### MULTI-STAGE (deteksi ➜ crop ➜ segmen) — SATU daun (bbox terbesar)

In [None]:
# =========================================
# MULTI-STAGE · BBOX TERBESAR ➜ SEGMENTASI
# =========================================
from ultralytics import YOLO
import numpy as np, cv2, os, glob, pandas as pd
from pathlib import Path

# ---------- CFG (ISI SENDIRI) ----------
DET_MODEL_PATH  = "PATH/TO/yolo11?-det-best.pt"  # deteksi: 0=frog-eye-leaf-spot, 1=healthy, 2=rust
SEG_MODEL_PATH  = "PATH/TO/yolo11?-seg-best.pt"  # segmentasi 5 kelas seperti di atas
INPUT_DIR       = "PATH/TO/images"
OUTPUT_DIR      = "PATH/TO/output_multi_largest"
CSV_PATH        = str(Path(OUTPUT_DIR) / "summary_multi_largest.csv")
DET_CONF        = 0.25
DET_IOU         = 0.5
SEG_CONF        = 0.25
SEG_IOU         = 0.5
MASK_THRESHOLD  = 0.5
PADDING_RATIO   = 0.08   # 8% dari sisi terpanjang bbox
SAVE_OVERLAY    = True
# ---------------------------------------

# Mapping kelas deteksi -> kelas segmen leaf & lesion
# det: 0=frog-eye; 1=healthy; 2=rust
DET_TO_SEG_LEAF   = {0: 0, 1: 2, 2: 3}
DET_TO_SEG_LESION = {0: 1, 2: 4}  # healthy tidak punya lesion

os.makedirs(OUTPUT_DIR, exist_ok=True)
model_det = YOLO(DET_MODEL_PATH)
model_seg = YOLO(SEG_MODEL_PATH)

def clamp(v, lo, hi): return max(lo, min(hi, v))

rows = []
for img_path in sorted(glob.glob(str(Path(INPUT_DIR) / "*.*"))):
    img_name = Path(img_path).name
    img = cv2.imread(img_path)
    H, W = img.shape[:2]

    det = model_det.predict(img_path, conf=DET_CONF, iou=DET_IOU, verbose=False)[0]
    if det.boxes is None or len(det.boxes) == 0:
        rows.append({"image": img_name, "det_class": None, "leaf_px": 0, "lesion_px": 0, "severity_pct": 0.0})
        continue

    boxes = det.boxes.xyxy.cpu().numpy()
    dcls  = det.boxes.cls.cpu().numpy().astype(int)

    areas = (boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1])
    idx   = int(np.argmax(areas))
    x1,y1,x2,y2 = boxes[idx]
    det_c = int(dcls[idx])

    w, h = x2-x1, y2-y1
    pad  = PADDING_RATIO * max(w, h)
    x1p, y1p = int(clamp(x1-pad, 0, W-1)), int(clamp(y1-pad, 0, H-1))
    x2p, y2p = int(clamp(x2+pad, 0, W-1)), int(clamp(y2+pad, 0, H-1))
    crop = img[y1p:y2p, x1p:x2p].copy()

    seg = model_seg.predict(crop, conf=SEG_CONF, iou=SEG_IOU, verbose=False)[0]
    if seg.masks is None or seg.boxes is None or len(seg.masks) == 0:
        rows.append({"image": img_name, "det_class": det_c, "leaf_px": 0, "lesion_px": 0, "severity_pct": 0.0})
        continue

    masks = (seg.masks.data.cpu().numpy() > MASK_THRESHOLD)
    scls  = seg.boxes.cls.cpu().numpy().astype(int)

    # pilih leaf sesuai kelas deteksi
    seg_leaf_id = DET_TO_SEG_LEAF.get(det_c, None)
    leaf_mask = np.zeros_like(masks[0], dtype=bool)
    if seg_leaf_id is not None:
        leaf_idxs = [i for i, c in enumerate(scls) if c == seg_leaf_id]
        if leaf_idxs:
            # bila ada banyak instance daun, pilih terbesar
            leaf_stack = np.stack([masks[i] for i in leaf_idxs], axis=0)
            areas_leaf = leaf_stack.reshape(len(leaf_idxs), -1).sum(axis=1)
            leaf_mask  = leaf_stack[int(np.argmax(areas_leaf))]

    # lesion sesuai kelas deteksi (healthy tidak punya)
    lesion_mask = np.zeros_like(leaf_mask, dtype=bool)
    seg_lesion_id = DET_TO_SEG_LESION.get(det_c, None)
    if seg_lesion_id is not None:
        lesion_idxs = [i for i, c in enumerate(scls) if c == seg_lesion_id]
        if lesion_idxs:
            lesion_mask = np.any(masks[lesion_idxs], axis=0)

    lesion_in_leaf = lesion_mask & leaf_mask
    leaf_px   = int(leaf_mask.sum())
    lesion_px = int(lesion_in_leaf.sum())
    sev = (lesion_px / leaf_px * 100.0) if leaf_px > 0 else 0.0

    rows.append({"image": img_name, "det_class": det_c, "leaf_px": leaf_px, "lesion_px": lesion_px, "severity_pct": sev})

    if SAVE_OVERLAY:
        overlay = crop.copy()
        overlay[leaf_mask]        = (0.7*overlay[leaf_mask] + 0.3*np.array([0,255,0])).astype(np.uint8)
        overlay[lesion_in_leaf]   = (0.7*overlay[lesion_in_leaf] + 0.3*np.array([0,0,255])).astype(np.uint8)
        cv2.putText(overlay, f"Severity: {sev:.2f}%", (10, overlay.shape[0]-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0), 2, cv2.LINE_AA)
        cv2.imwrite(str(Path(OUTPUT_DIR) / f"{Path(img_name).stem}_largest_overlay.jpg"), overlay)

pd.DataFrame(rows).to_csv(CSV_PATH, index=False)
print(f"[Multi-stage · largest] CSV saved -> {CSV_PATH}")


### MULTI-STAGE (deteksi ➜ crop ➜ segmen) — SEMUA daun

In [None]:
# ======================================
# MULTI-STAGE · SEMUA BBOX ➜ SEGMENTASI
# ======================================
from ultralytics import YOLO
import numpy as np, cv2, os, glob, pandas as pd
from pathlib import Path

# ---------- CFG (ISI SENDIRI) ----------
DET_MODEL_PATH  = "PATH/TO/yolo11?-det-best.pt"  # det: 0=frog-eye-leaf-spot,1=healthy,2=rust
SEG_MODEL_PATH  = "PATH/TO/yolo11?-seg-best.pt"  # seg 5 kelas (lihat atas)
INPUT_DIR       = "PATH/TO/images"
OUTPUT_DIR      = "PATH/TO/output_multi_all"
CSV_PATH        = str(Path(OUTPUT_DIR) / "summary_multi_all.csv")
DET_CONF        = 0.25
DET_IOU         = 0.5
SEG_CONF        = 0.25
SEG_IOU         = 0.5
MASK_THRESHOLD  = 0.5
PADDING_RATIO   = 0.08
SAVE_OVERLAY    = True
# ---------------------------------------

DET_TO_SEG_LEAF   = {0: 0, 1: 2, 2: 3}
DET_TO_SEG_LESION = {0: 1, 2: 4}

os.makedirs(OUTPUT_DIR, exist_ok=True)
CROP_DIR = Path(OUTPUT_DIR) / "crops"
os.makedirs(CROP_DIR, exist_ok=True)

model_det = YOLO(DET_MODEL_PATH)
model_seg = YOLO(SEG_MODEL_PATH)

def clamp(v, lo, hi): return max(lo, min(hi, v))

rows = []
for img_path in sorted(glob.glob(str(Path(INPUT_DIR) / "*.*"))):
    img_name = Path(img_path).name
    img = cv2.imread(img_path)
    H, W = img.shape[:2]

    det = model_det.predict(img_path, conf=DET_CONF, iou=DET_IOU, verbose=False)[0]
    if det.boxes is None or len(det.boxes) == 0:
        rows.append({"image": img_name, "bbox_index": None, "det_class": None,
                     "leaf_px": 0, "lesion_px": 0, "severity_pct": 0.0})
        continue

    boxes = det.boxes.xyxy.cpu().numpy()
    dcls  = det.boxes.cls.cpu().numpy().astype(int)

    for bi, (bb, det_c) in enumerate(zip(boxes, dcls)):
        x1,y1,x2,y2 = bb
        w, h = x2-x1, y2-y1
        pad  = PADDING_RATIO * max(w, h)
        x1p, y1p = int(clamp(x1-pad, 0, W-1)), int(clamp(y1-pad, 0, H-1))
        x2p, y2p = int(clamp(x2+pad, 0, W-1)), int(clamp(y2+pad, 0, H-1))
        crop = img[y1p:y2p, x1p:x2p].copy()

        seg = model_seg.predict(crop, conf=SEG_CONF, iou=SEG_IOU, verbose=False)[0]
        if seg.masks is None or seg.boxes is None or len(seg.masks) == 0:
            rows.append({"image": img_name, "bbox_index": bi, "det_class": int(det_c),
                         "leaf_px": 0, "lesion_px": 0, "severity_pct": 0.0})
            continue

        masks = (seg.masks.data.cpu().numpy() > MASK_THRESHOLD)
        scls  = seg.boxes.cls.cpu().numpy().astype(int)

        # leaf sesuai kelas deteksi
        leaf_mask = np.zeros_like(masks[0], dtype=bool)
        seg_leaf_id = DET_TO_SEG_LEAF.get(int(det_c), None)
        if seg_leaf_id is not None:
            leaf_idxs = [i for i, c in enumerate(scls) if c == seg_leaf_id]
            if leaf_idxs:
                leaf_stack = np.stack([masks[i] for i in leaf_idxs], axis=0)
                areas_leaf = leaf_stack.reshape(len(leaf_idxs), -1).sum(axis=1)
                leaf_mask  = leaf_stack[int(np.argmax(areas_leaf))]

        # lesion sesuai kelas deteksi (healthy tak punya)
        lesion_mask = np.zeros_like(leaf_mask, dtype=bool)
        seg_lesion_id = DET_TO_SEG_LESION.get(int(det_c), None)
        if seg_lesion_id is not None:
            lesion_idxs = [i for i, c in enumerate(scls) if c == seg_lesion_id]
            if lesion_idxs:
                lesion_mask = np.any(masks[lesion_idxs], axis=0)

        lesion_in_leaf = lesion_mask & leaf_mask
        leaf_px   = int(leaf_mask.sum())
        lesion_px = int(lesion_in_leaf.sum())
        sev = (lesion_px / leaf_px * 100.0) if leaf_px > 0 else 0.0

        rows.append({"image": img_name, "bbox_index": bi, "det_class": int(det_c),
                     "leaf_px": leaf_px, "lesion_px": lesion_px, "severity_pct": sev})

        if SAVE_OVERLAY:
            overlay = crop.copy()
            overlay[leaf_mask]        = (0.7*overlay[leaf_mask] + 0.3*np.array([0,255,0])).astype(np.uint8)
            overlay[lesion_in_leaf]   = (0.7*overlay[lesion_in_leaf] + 0.3*np.array([0,0,255])).astype(np.uint8)
            cv2.putText(overlay, f"Severity: {sev:.2f}%", (10, overlay.shape[0]-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0), 2, cv2.LINE_AA)
            cv2.imwrite(str(Path(CROP_DIR) / f"{Path(img_name).stem}_bb{bi}_overlay.jpg"), overlay)

pd.DataFrame(rows).to_csv(CSV_PATH, index=False)
print(f"[Multi-stage · all leaves] CSV saved -> {CSV_PATH}")
