<a href="https://colab.research.google.com/github/EmTampz/Copra_YOLOv11_v12_Colab_Notebooks/blob/main/Copra_1_manual_optimized.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ==========================================================
# üì¶ ENVIRONMENT SETUP for YOLOv11 Thesis (Colab)

# Core deep learning and YOLO library
!pip install -U ultralytics

# Core data handling, visualization, and ML utilities
!pip install pandas matplotlib seaborn pillow scikit-learn

#local dashboard:  monitoring and logging
!pip install tensorboard tqdm

# Roboflow integration
!pip install roboflow

# cloud-connected research tracker
!pip -q install wandb




In [2]:
import torch, ultralytics, pandas as pd, matplotlib, seaborn, PIL, sklearn, tqdm, tensorboard

print(f"PyTorch version : {torch.__version__}")
print(f"CUDA available  : {torch.cuda.is_available()}")
print(f"Ultralytics ver : {ultralytics.__version__}")
ultralytics.checks()

Ultralytics 8.3.224 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
Setup complete ‚úÖ (2 CPUs, 12.7 GB RAM, 40.2/235.7 GB disk)


In [3]:
"""
manual_tuning_yolo11.py
Manual hyperparameter tuning for YOLOv11 (Detection -> Segmentation -> Classification)
Integrated with Roboflow API for automatic dataset download (YOLO format).

Now includes robust interruption safety:
üîí Checkpointing every N epochs (save_period)
üîÅ Auto-resume from latest checkpoint (Ultralytics resume + W&B resume)
üíæ Saves everything to Google Drive for persistence
üìä Weights & Biases (W&B) logging to the cloud (resumable, descriptive run names)

Still includes:
‚úÖ Per-class PR curves (AUC/AP) + combined plot (detection-style)
‚úÖ Per-class Precision, Recall, F1, Accuracy (Macro & Weighted)
‚úÖ Confusion matrix + overall accuracy
‚úÖ CSV + PNG per run, limited inference previews
‚úÖ One-click ZIP of all artifacts for download
‚úÖ TensorBoard logs (optional)

Run first (in another Colab cell):
!pip install ultralytics roboflow pandas matplotlib seaborn pillow scikit-learn wandb
"""
import os, time, shutil, yaml, json, random, glob, zipfile, hashlib
from datetime import datetime
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.metrics import (
    confusion_matrix,
    ConfusionMatrixDisplay,
    precision_recall_fscore_support,
    accuracy_score,
)

# ---------------- W&B Auto Login (No Prompt Needed) ----------------
os.environ["WANDB_API_KEY"] = "02a2986eed98555093c4c2920a04c631a89430cc"
!wandb login $WANDB_API_KEY --relogin

import wandb
print("Current W&B status:", wandb.api.api_key)

# ---------------- Google Drive Persistence ----------------
# üíæ Mount Google Drive so all checkpoints/logs survive Colab disconnections
try:
    from google.colab import drive  # type: ignore
    drive.mount('/content/drive')
    DRIVE_OK = True
except Exception:
    DRIVE_OK = False

# If Drive is mounted, results will be stored persistently in MyDrive
PERSIST_BASE = "/content/drive/MyDrive/yolo_copra" if DRIVE_OK else "/content"
RESULTS_BASE = os.path.join(PERSIST_BASE, "copra_yolo11_manual_results")
os.makedirs(RESULTS_BASE, exist_ok=True)

# ---------------- Roboflow Dataset Integration ----------------
from roboflow import Roboflow
rf = Roboflow(api_key="LS1Ohi2OQZ178OOqQ4zc")              # <-- API_KEY
project = rf.workspace("cv-opfhv").project("mor-v4-8itha")  # <-- project slug
version = project.version(1)
dataset = version.download("yolov11")

ROBOFLOW_DATA_YAML = os.path.join(dataset.location, "data.yaml")
print(f"‚úÖ Dataset downloaded successfully!\nüìÇ YAML: {ROBOFLOW_DATA_YAML}")

# ---------------- YOLOv11 + Logging Configurations ----------------
from ultralytics import YOLO
from IPython.display import Image as IPyImage, display

TENSORBOARD_ENABLE = True      # for local visualization
WANDB_ENABLE = True            # enable W&B logging
WANDB_PROJECT = "Copra-YOLOv11-Tuning"  # Descriptive W&B project name

# ---------------- TensorBoard + W&B Helpers ----------------
def ensure_tensorboard():
    """Enable Ultralytics TensorBoard logging."""
    if TENSORBOARD_ENABLE:
        os.system("yolo settings tensorboard=True")

def ensure_wandb():
    """Enable W&B logging for cloud-based tracking and resumption."""
    if not WANDB_ENABLE:
        os.system("yolo settings wandb=False")
        return None, None
    os.system("yolo settings wandb=True")
    import wandb
    return wandb, True

# ---------------- Task and Hyperparameter Setup ----------------
TASKS = ["detect", "seg", "cls"]
MODEL_MAP = {
    "detect": "yolo11n.pt",
    "seg":    "yolo11n-seg.pt",
}

MANUAL_CONFIG = {
    "epochs": 100,
    "batch": 16,
    "optimizer": "auto",
    "lr0": 0.001,
    "patience": 100,
    "save_period": 5   # Save checkpoint every N epochs
}

IMG_SIZE = 640

TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
SUMMARY_CSV = os.path.join(RESULTS_BASE, f"manual_summary_{TIMESTAMP}.csv")

# ---------------- Utility Functions ----------------
def load_data_yaml(path):
    """Load dataset metadata (class names, paths) from YAML."""
    with open(path, "r") as f:
        return yaml.safe_load(f)

def iou_xyxy(box1, box2):
    """Compute IoU between two [x1,y1,x2,y2] boxes."""
    x1 = max(box1[0], box2[0]); y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2]); y2 = min(box1[3], box2[3])
    inter_w, inter_h = max(0, x2 - x1), max(0, y2 - y1)
    inter_area = inter_w * inter_h
    area1 = (box1[2]-box1[0])*(box1[3]-box1[1])
    area2 = (box2[2]-box2[0])*(box2[3]-box2[1])
    union = area1 + area2 - inter_area
    return inter_area / union if union > 0 else 0.0

def yolo_label_to_xyxy(norm, w, h):
    """Convert YOLO [xc,yc,bw,bh] normalized coords to pixel coords."""
    xc, yc, bw, bh = norm
    xc, yc, bw, bh = xc*w, yc*h, bw*w, bh*h
    return [xc - bw/2, yc - bh/2, xc + bw/2, yc + bh/2]

def parse_label_file(label_path):
    """Read YOLO label .txt file and return (class, bbox) pairs."""
    labels = []
    if os.path.exists(label_path):
        with open(label_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 5:
                    cls = int(parts[0]); norm = list(map(float, parts[1:5]))
                    labels.append((cls, norm))
    return labels

def save_weights_copy(src_path, dest_dir, run_name):
    """Copy trained weights into a central folder for easy access."""
    os.makedirs(dest_dir, exist_ok=True)
    if src_path and os.path.exists(src_path):
        dest = os.path.join(dest_dir, f"{run_name}_{Path(src_path).name}")
        shutil.copy(src_path, dest)
        return dest
    return None

# ---------------- W&B Resume Utilities ----------------
def wandb_run_id_from_name(name):
    """Generate stable hash-based W&B run ID for resuming."""
    return hashlib.sha1(name.encode()).hexdigest()[:12]

def stable_run_name(task, cfg):
    """Create a consistent run name based on hyperparameters."""
    return f"{task}_e{cfg['epochs']}_b{cfg['batch']}_lr{cfg['lr0']}_opt{cfg['optimizer']}_p{cfg['patience']}"

def latest_checkpoint(run_dir):
    """Find the most recent checkpoint in the given run directory."""
    weights = os.path.join(run_dir, "weights")
    last = os.path.join(weights, "last.pt")
    best = os.path.join(weights, "best.pt")
    if os.path.exists(last): return last
    if os.path.exists(best): return best
    ckpts = sorted(glob.glob(os.path.join(weights, "epoch*.pt")))
    return ckpts[-1] if ckpts else None

# ---------------- Main Training + Logging ----------------
def run_one_task(task, base_model, cfg, results_base):
    """Train, evaluate, and log metrics for a single YOLO task."""
    model = YOLO(base_model)
    base_run_name = stable_run_name(task, cfg)
    project_dir = os.path.join(results_base, task, base_run_name)
    os.makedirs(project_dir, exist_ok=True)

    # üîÅ Resume logic
    ckpt = latest_checkpoint(project_dir)
    do_resume = ckpt is not None

    # üìä Initialize W&B with descriptive name
    wandb, _ = ensure_wandb()
    if WANDB_ENABLE and wandb is not None:
        run_id = wandb_run_id_from_name(base_run_name)
        wandb_name = (
            f"{task.upper()} | e={cfg['epochs']} | b={cfg['batch']} | "
            f"lr={cfg['lr0']} | opt={cfg['optimizer']} | p={cfg['patience']} | {datetime.now().strftime('%Y%m%d_%H%M%S')}"
        )
        wandb.init(
            project=WANDB_PROJECT,
            name=wandb_name,
            group=task,
            id=run_id,
            resume="allow"
        )
        wandb.config.update(cfg)
        wandb.config.update({"task": task, "model": base_model, "dataset": ROBOFLOW_DATA_YAML})

    print(f"\n--- Training {task.upper()} | Run: {base_run_name} ---")
    if do_resume:
        print(f"üîÅ Resuming from checkpoint: {ckpt}")
    else:
        print("üÜï Starting fresh training...")

    # Start training with checkpoint saving and resume enabled
    # TRAIIIIIIIIIINNNNNNNNNN
    t0 = time.time()
    model.train(
        data=ROBOFLOW_DATA_YAML,
        epochs=cfg["epochs"],
        batch=cfg["batch"],
        imgsz=IMG_SIZE,
        optimizer=cfg["optimizer"],
        lr0=cfg["lr0"],
        patience=cfg["patience"],
        project=os.path.join(results_base, task),
        name=base_run_name,
        resume=do_resume,
        save_period=cfg["save_period"]
    )
    elapsed = round(time.time() - t0, 2)
    print(f"‚è±Ô∏è Training finished in {elapsed} seconds")

    # ---------------- Validation + Metrics ----------------
    # VAAAAAAAALLLIIIIDDAAAAAAATION
    val_results = model.val()
    metrics = {}
    try:
        metrics["mAP@0.5"] = float(val_results.box.map50)
        metrics["mAP@0.5:0.95"] = float(val_results.box.map)
        metrics["precision"] = float(val_results.box.precision)
        metrics["recall"] = float(val_results.box.recall)
        metrics["f1"] = float(val_results.box.f1)
    except Exception:
        metrics = {k: None for k in ["mAP@0.5","mAP@0.5:0.95","precision","recall","f1"]}

    # üß† Log key metrics to W&B
    if WANDB_ENABLE and wandb is not None:
        wandb.log(metrics)

    # ---------------- Save weights ----------------
    weights_dir = os.path.join(project_dir, "weights")
    best_w = os.path.join(weights_dir, "best.pt")
    last_w = os.path.join(weights_dir, "last.pt")
    chosen = best_w if os.path.exists(best_w) else (last_w if os.path.exists(last_w) else None)
    saved_copy = save_weights_copy(chosen, os.path.join(results_base, "saved_models"), base_run_name)

    # ---------------- Inference + Visualization ----------------
    infer_folder = os.path.join(project_dir, "inference_results")
    os.makedirs(infer_folder, exist_ok=True)
    val_images_path = os.path.join(dataset.location, "valid", "images")
    val_imgs = glob.glob(f"{val_images_path}/*.jpg")
    sample_imgs = random.sample(val_imgs, min(3, len(val_imgs)))

    if sample_imgs:
        print(f"üñºÔ∏è Performing inference on {len(sample_imgs)} sample images...")
        model.predict(source=sample_imgs, save=True, project=infer_folder, name="sample_preds")
        for img in glob.glob(f"{infer_folder}/sample_preds/*.jpg")[:3]:
            display(IPyImage(filename=img, width=400))

    # ---------------- Confusion Matrix + Per-Class Metrics ----------------
    data_meta = load_data_yaml(ROBOFLOW_DATA_YAML)
    class_names = data_meta.get("names", [])
    y_true, y_pred = [], []

    for img_path in val_imgs:
        res = model.predict(img_path, imgsz=IMG_SIZE, conf=0.25, iou=0.45, stream=False, verbose=False)[0]
        preds = [int(b[5]) for b in res.boxes.data.tolist()] if hasattr(res, "boxes") else []
        label_path = Path(img_path).with_suffix(".txt")
        gt = parse_label_file(str(label_path))
        y_true.extend([g[0] for g in gt])
        y_pred.extend(preds[:len(gt)])  # align predictions

    cm = confusion_matrix(y_true, y_pred, labels=list(range(len(class_names))))
    acc = accuracy_score(y_true, y_pred)
    precision, recall, f1, support = precision_recall_fscore_support(y_true, y_pred, zero_division=0)

    fig, ax = plt.subplots(figsize=(6,6))
    ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names).plot(ax=ax)
    plt.title(f"Confusion Matrix ({task}) | Acc={acc:.3f}")
    plt.tight_layout()
    plt.savefig(os.path.join(project_dir, "confusion_matrix.png"))
    plt.close()

    # üßæ Log final results to W&B
    if WANDB_ENABLE and wandb is not None:
        wandb.log({"confusion_matrix": wandb.Image(os.path.join(project_dir, "confusion_matrix.png"))})
        wandb.finish()

    # ---------------- Return Record ----------------
    return {
        "task": task,
        "run_name": base_run_name,
        "elapsed_seconds": elapsed,
        **metrics,
        "accuracy": acc,
        "saved_model": saved_copy
    }

# ---------------- Zipping All Results ----------------
def zip_all_results(results_base, timestamp):
    """Zip all results: weights, plots, metrics, inferences, CSVs."""
    zip_name = os.path.join(results_base, f"all_results_{timestamp}.zip")
    with zipfile.ZipFile(zip_name, "w", zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(results_base):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, start=results_base)
                zipf.write(file_path, arcname)
    print(f"\nüì¶ All results zipped: {zip_name}")
    return zip_name

# ---------------- Entry Point ----------------
if __name__ == "__main__":
    ensure_tensorboard()
    data_meta = load_data_yaml(ROBOFLOW_DATA_YAML)
    print("üìä Classes:", data_meta.get("names"))
    summary_rows = []

    for task in TASKS:
        base_model = MODEL_MAP[task]
        rec = run_one_task(task, base_model, MANUAL_CONFIG, RESULTS_BASE)
        print("Run Summary:", rec)
        summary_rows.append(rec)

    pd.DataFrame(summary_rows).to_csv(SUMMARY_CSV, index=False)
    print(f"\n‚úÖ Manual tuning complete. Summary saved to: {SUMMARY_CSV}")

    acc_vals = [r["accuracy"] for r in summary_rows if r.get("accuracy")]
    if acc_vals:
        print(f"üìà Average Classification Accuracy: {np.mean(acc_vals):.3f}")

    zip_all_results(RESULTS_BASE, TIMESTAMP)



[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
Current W&B status: 02a2986eed98555093c4c2920a04c631a89430cc
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
loading Roboflow workspace...
loading Roboflow project...
‚úÖ Dataset downloaded successfully!
üìÇ YAML: /content/MOR-V4-1/data.yaml
üìä Classes: ['Over-Cooked', 'Perfectly-Cooked', 'Under-Cooked']


[34m[1mwandb[0m: Currently logged in as: [33mmmtampogao[0m ([33mmmtampogao-university-of-the-philippines[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin



--- Training CLS | Run: cls_e100_b16_lr0.001_optauto_p100 ---
üÜï Starting fresh training...
Ultralytics 8.3.224 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/MOR-V4-1/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n-cls.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=cls_e100_b16_lr0.001_o

RuntimeError: Dataset '/content/MOR-V4-1/data.yaml' error ‚ùå Classification datasets must be a directory (data="path/to/dir") not a file (data="/content/MOR-V4-1/data.yaml"), See https://docs.ultralytics.com/datasets/classify/