In [None]:
!pip install ultralytics

In [None]:
# train_yolov8_advanced.py
from ultralytics import YOLO
import torch

# GPU status
print(" Torch version:", torch.__version__)
print("Using device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")


# Load model (yolov8n/s/m/l/x, or custom .pt checkpoint)
model = YOLO("yolov8n.pt")  # I am using 8n, can change to 8s or 8m

# Train the model
model.train(
    data="/content/drive/MyDrive/detection/foodie/data.yaml",        # Path is relative to my 'detection' folder
    epochs=150,                     # deeper for better convergence
    imgsz=640,                      # Image size
    batch=8,                        # Adjust based on memory (MPS = 8–16 is safe)
    device=0,                   # else MPS for Apple Silicon GPU if running locally on mac(that is what I was doing earlier)

    # Advanced options
    optimizer="AdamW",              # Better than SGD for some datasets
    lr0=0.001,                      # Initial learning rate
    lrf=0.01,                       # Final learning rate fraction
    #momentum=0.937,                  #Only used with SGD
    weight_decay=0.001,            # Regularization
    warmup_epochs=3,               # Warm-up to stabilize early training
    #warmup_bias_lr=0.05,

    # Augmentations
    hsv_h=0.015,                   # Image hue augmentation
    hsv_s=0.7,
    hsv_v=0.4,
    degrees=0.0,
    translate=0.1,
    scale=0.5,
    shear=0.0,
    perspective=0.0,
    flipud=0.0,
    mosaic=1.0,
    mixup=0.2,
    erasing=0.4,
    auto_augment="randaugment",


    # Training controls
    patience=20,                   # Early stopping patience
    val=True,                      # Run validation every epoch
    save=True,                     # Save checkpoints
    save_period=10,                # Save every 10 epochs
    pretrained=True,               # Use pre-trained weights
    workers=4,                     # Multi-threaded loading

    # Logging & output
    project="/content/drive/MyDrive/detection/yolo_results",   # Folder for all experiments
    name="exp_foodie_augmented",  # Name of this training run
    exist_ok=True,                # Overwrite if folder exists
    verbose=True,
    seed=42                       # Reproducibility
)

print("Training complete!")

In [None]:
# --- paths (edit if now differs) ---
RUN_DIR = "/content/drive/MyDrive/detection/yolo_results/exp_foodie_augmented"
LAST = f"{RUN_DIR}/weights/last.pt"   # your last.pt

# sanity checks
import os, sys
assert os.path.exists(LAST), f"Can't find: {LAST}"
assert os.path.exists(os.path.join(RUN_DIR, "args.yaml")), \
       f"args.yaml not found next to weights in {RUN_DIR} (needed for resume)"

from ultralytics import YOLO

# load last checkpoint *from that run* and resume
model = YOLO(LAST)
model.train(resume=True)              # continues from epoch 48 in the same run folder

# In case,  want to extend the total epochs (e.g., to 100 total):
# model.train(resume=True, epochs=100)


In [None]:
import os
import cv2
import albumentations as A
from pathlib import Path
from tqdm import tqdm
import random

# === CONFIG ===
image_dir = "/content/drive/MyDrive/detection/foodie/train/images"
label_dir = "/content/drive/MyDrive/detection/foodie/train/labels"
samples_per_class = 150

#  Target class name → class ID mapping (YOLOv8 format)
class_name_to_id = {
    'String Bean Chicken Breast': 5,
    'chinese_sausage': 11,
    'curry': 16,
    'water_spinach': 21,
    'tostitos cheese dip sauce': 22,
    'mung_bean_sprouts': 20,
    'black pepper rice bowl': 23
}
target_class_ids = set(class_name_to_id.values())

# === AUGMENTATION PIPELINE ===
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.Rotate(limit=15, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.04, scale_limit=0.04, rotate_limit=15, p=0.6)
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'], min_visibility=0.3))

# === FUNCTION TO LOAD FILES BY CLASS ===
def get_images_by_class():
    class_to_images = {cls_id: [] for cls_id in target_class_ids}
    for label_file in os.listdir(label_dir):
        if not label_file.endswith(".txt"):
            continue
        label_path = os.path.join(label_dir, label_file)
        try:
            with open(label_path, 'r') as f:
                for line in f:
                    cls_id = int(float(line.strip().split()[0]))
                    if cls_id in target_class_ids:
                        class_to_images[cls_id].append(label_file.replace(".txt", ""))
                        break
        except:
            continue
    return class_to_images

# === START AUGMENTATION ===
class_to_images = get_images_by_class()
augmented_total = 0

print("\n Starting YOLOv8-safe augmentation...\n")
for cls_id, stems in class_to_images.items():
    if len(stems) == 0:
        print(f" No images found for class {cls_id}. Skipping.\n")
        continue

    print(f"Augmenting class {cls_id}...")
    for i in tqdm(range(samples_per_class), desc=f"Class {cls_id}"):
        stem = random.choice(stems)
        img_path = os.path.join(image_dir, stem + ".jpg")
        lbl_path = os.path.join(label_dir, stem + ".txt")

        image = cv2.imread(img_path)
        if image is None:
            continue
        h, w = image.shape[:2]

        try:
            with open(lbl_path, "r") as f:
                lines = f.readlines()
        except:
            continue

        bboxes = []
        class_labels = []
        for line in lines:
            try:
                parts = list(map(float, line.strip().split()))
                if len(parts) != 5:
                    continue
                cls, x, y, bw, bh = parts
                # Convert to corner format to check
                x_min = x - bw / 2
                y_min = y - bh / 2
                x_max = x + bw / 2
                y_max = y + bh / 2
                if not (0 <= x_min <= 1 and 0 <= y_min <= 1 and 0 <= x_max <= 1 and 0 <= y_max <= 1):
                    continue
                bboxes.append([x, y, bw, bh])
                class_labels.append(int(cls))
            except:
                continue

        if not bboxes:
            continue

        try:
            transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels)
        except Exception as e:
            continue

        aug_img = transformed['image']
        aug_boxes = transformed['bboxes']
        aug_cls = transformed['class_labels']

        out_stem = f"{stem}_aug{i}"
        out_img_path = os.path.join(image_dir, out_stem + ".jpg")
        out_lbl_path = os.path.join(label_dir, out_stem + ".txt")

        cv2.imwrite(out_img_path, aug_img)

        with open(out_lbl_path, "w") as f:
            for box, cls in zip(aug_boxes, aug_cls):
                f.write(f"{cls} {box[0]:.6f} {box[1]:.6f} {box[2]:.6f} {box[3]:.6f}\n")

        augmented_total += 1

print(f"\n Augmentation done! Total new images: {augmented_total}")


In [None]:
# yolo_v8_rebalance_train.py (Colab-ready paths)
from ultralytics import YOLO
import yaml
from collections import Counter
from pathlib import Path

# --- Paths in Colab environment ---
DATA = "/content/foodie/data.yaml"   # dataset YAML in foodie folder
PROJECT = "/content/yolo_results"    # save results in /content/yolo_results
RUN_NAME = "exp_foodie_rebalance_v1"
START_WEIGHTS = "/content/best.pt"   # uploaded best.pt in /content
results="/content/drive/MyDrive/detection/yolo_results"
def load_names(cfg):
    names = cfg["names"]
    if isinstance(names, dict):
        names = [names[i] for i in range(len(names))]
    return names

def count_classes(data_yaml_path: str):
    with open(data_yaml_path, "r") as f:
        cfg = yaml.safe_load(f)
    names = load_names(cfg)

    labels_dir = Path(data_yaml_path).parent / "train" / "labels"
    if not labels_dir.exists():
        raise FileNotFoundError(f"Labels dir not found: {labels_dir}")

    counts = Counter()
    bad_lines = 0

    for txt in labels_dir.glob("*.txt"):
        for raw in txt.read_text().splitlines():
            line = raw.strip()
            if not line:
                continue
            parts = line.split()
            try:
                cid = int(float(parts[0]))  # tolerate "4.0"
            except Exception:
                bad_lines += 1
                continue
            if 0 <= cid < len(names):
                counts[cid] += 1
            else:
                bad_lines += 1

    print("Class counts (train set):")
    for i, name in enumerate(names):
        print(f"{i:2d} {name:25s} -> {counts.get(i,0)}")
    if bad_lines:
        print(f"Skipped {bad_lines} malformed/out-of-range lines while counting.")
    return counts, names

if __name__ == "__main__":
    counts, names = count_classes(DATA)

    total = max(1, sum(counts.values()))
    avg = total / max(1, len(names))
    rare_ratio = sum(1 for i in range(len(names)) if counts.get(i, 0) < 0.7 * avg) / max(1, len(names))
    cls_weight = 1.0 + min(0.6, 0.6 * rare_ratio)

    print(f"\n Using cls loss weight = {cls_weight:.2f}  (rare_ratio={rare_ratio:.2f})")
    print(f"Starting from weights: {START_WEIGHTS}")

    model = YOLO(START_WEIGHTS)

    results = model.train(
        data=DATA,
        epochs=80,
        imgsz=704,
        batch=-1,
        device=0,
        workers=4,
        cache='ram',

        optimizer="AdamW",
        lr0=8e-4,
        lrf=0.01,
        cos_lr=True,
        warmup_epochs=5,

        box=7.5,
        cls=cls_weight,
        dfl=1.5,

        mosaic=1.0,
        close_mosaic=12,
        mixup=0.25,
        copy_paste=0.30,
        erasing=0.35,
        hsv_h=0.015, hsv_s=0.7, hsv_v=0.45,
        degrees=5.0, translate=0.12, scale=0.5, shear=2.0,
        fliplr=0.5, flipud=0.0,
        perspective=0.0,

        patience=30,
        deterministic=True,
        amp=True,

        project=PROJECT,
        name=RUN_NAME,
        exist_ok=True,
        save_period=10,
        val=True,
        pretrained=False,  # custom weights already loaded
        plots=True,
        seed=42
    )

    print(f"\n Done. Results at: {Path(results) / RUN_NAME}")


In [None]:
from ultralytics import YOLO

DATA = "/content/foodie/data.yaml"  # current yaml
START = "/content/best.pt"

model = YOLO(START)

# train on all classes except 5
keep = [i for i in range(24) if i != 5]

results = model.train(
    data=DATA,
    classes=keep,          # <-- this excludes class 5 without editing files
    epochs=60,
    imgsz=736,
    batch=-1,
    device=0,
    workers=4,
    cache='ram',
    optimizer="AdamW",
    lr0=8e-4,
    lrf=0.01,
    cos_lr=True,
    warmup_epochs=3,
    box=7.5, cls=1.20, dfl=1.5,
    mosaic=1.0, close_mosaic=12,
    mixup=0.15, copy_paste=0.30, erasing=0.35,
    hsv_h=0.015, hsv_s=0.7, hsv_v=0.45,
    degrees=5.0, translate=0.12, scale=0.5, shear=2.0,
    fliplr=0.5, flipud=0.0,
    patience=25,
    deterministic=True,
    amp=True,
    project="/content/yolo_results",
    name="exp_foodie_no_stringbean",
    exist_ok=True,
    save_period=10,
    val=True,
    pretrained=False
)


In [None]:
# --- FINAL FIX: Remove String Bean Chicken Breast from model predictions ---
from ultralytics import YOLO
from pathlib import Path
import yaml
import torch
import cv2
import numpy as np

# ====== CONFIG ======
MODEL_PT   = "/content/best.pt"
DATA_YAML  = "/content/foodie/data.yaml"
PROJECT    = "/content/yolo_results"
CONF, IOU, IMGSZ = 0.25, 0.50, 640
RENDER_PREDS = True

# The class to completely remove (it exists in model but not in yaml)
REMOVE_CLASS_NAME = "String Bean Chicken Breast"
REMOVE_CLASS_ID = 5  # Based on your debug output
# ==================

print("=== REMOVING STRING BEAN CHICKEN BREAST FROM ALL RESULTS ===")

# Load model
model = YOLO(MODEL_PT)
print(f"Model has {len(model.names)} classes")
print(f"Will remove class '{REMOVE_CLASS_NAME}' (ID: {REMOVE_CLASS_ID})")

# Load yaml
dy_src = Path(DATA_YAML)
with dy_src.open("r") as f:
    data = yaml.safe_load(f)

yaml_classes = data.get("names")
if isinstance(yaml_classes, dict):
    yaml_classes = [yaml_classes[i] for i in range(len(yaml_classes))]

print(f"YAML has {len(yaml_classes)} classes (String Bean removed)")

# Setup paths
root = dy_src.parent
test_images = root / "test" / "images"

# Patch yaml for validation
need_patch = False
val_key = "valid" if "valid" in data else ("val" if "val" in data else None)
if val_key is None:
    data["valid"] = str(test_images)
    need_patch = True

val_dir_path = Path(data[val_key]) if isinstance(data[val_key], str) else Path(str(data[val_key]))
if not val_dir_path.exists():
    data[val_key] = str(test_images)
    need_patch = True

if "train" not in data or not Path(data["train"]).exists():
    data["train"] = str(test_images)
    need_patch = True

if need_patch:
    dy_patched = dy_src.with_name(dy_src.stem + "_patched.yaml")
    with dy_patched.open("w") as f:
        yaml.safe_dump(data, f)
    data_yaml_for_eval = str(dy_patched)
else:
    data_yaml_for_eval = str(dy_src)

# Clear caches
for split_name in ["train", "valid", "val", "test"]:
    cache_file = root / split_name / "labels.cache"
    if cache_file.exists():
        try:
            cache_file.unlink()
            print(f"Cleared cache: {cache_file}")
        except Exception:
            pass

# === EVALUATION WITH STRING BEAN FILTERED OUT ===
print(f"\n=== RUNNING EVALUATION (FILTERING OUT CLASS {REMOVE_CLASS_ID}) ===")

# Create list of classes to evaluate (exclude String Bean)
all_class_ids = list(range(len(model.names)))
classes_to_eval = [i for i in all_class_ids if i != REMOVE_CLASS_ID]

print(f"Evaluating {len(classes_to_eval)} classes (excluding String Bean)")

try:
    metrics = model.val(
        data=data_yaml_for_eval,
        split="test",
        imgsz=IMGSZ,
        conf=CONF,
        iou=IOU,
        classes=classes_to_eval,  # This excludes String Bean from metrics
        plots=True,
        project=PROJECT,
        name="eval_no_string_bean",
        workers=4,
        exist_ok=True,
        verbose=True
    )

    print("\n--- FILTERED EVALUATION RESULTS ---")
    print(f"mAP50-95: {metrics.box.map:.3f}")
    print(f"mAP50: {metrics.box.map50:.3f}")
    print(f"Precision: {metrics.box.mp:.3f}")
    print(f"Recall: {metrics.box.mr:.3f}")
    print(f"String Bean Chicken Breast completely excluded from metrics")

except Exception as e:
    print(f"Evaluation error: {e}")

# === CLEAN PREDICTIONS (NO STRING BEAN IN VISUALIZATIONS) ===
if RENDER_PREDS:
    print(f"\n=== GENERATING CLEAN VISUALIZATIONS (NO STRING BEAN) ===")

    save_dir = Path(PROJECT) / "predictions_no_string_bean"
    save_dir.mkdir(parents=True, exist_ok=True)

    test_files = list(test_images.glob("*.jpg")) + list(test_images.glob("*.png")) + list(test_images.glob("*.jpeg"))

    total_images = len(test_files)
    string_bean_detections_removed = 0
    images_with_string_bean = 0

    print(f"Processing {total_images} images...")

    for i, img_path in enumerate(test_files):
        if i % 100 == 0 and i > 0:
            print(f"Processed {i}/{total_images} images")

        # Get predictions
        results = model.predict(
            source=str(img_path),
            conf=CONF,
            iou=IOU,
            imgsz=IMGSZ,
            save=False,
            verbose=False
        )

        if len(results) > 0:
            result = results[0]

            # Count and remove String Bean predictions
            if result.boxes is not None and len(result.boxes) > 0:
                original_count = len(result.boxes)

                # Count String Bean detections
                string_bean_mask = result.boxes.cls == REMOVE_CLASS_ID
                string_bean_count = string_bean_mask.sum().item()

                if string_bean_count > 0:
                    images_with_string_bean += 1
                    string_bean_detections_removed += string_bean_count
                    print(f"  {img_path.name}: Removed {string_bean_count} String Bean detection(s)")

                # Keep only non-String Bean predictions
                keep_mask = result.boxes.cls != REMOVE_CLASS_ID

                if keep_mask.any():
                    # Filter all box attributes
                    result.boxes.data = result.boxes.data[keep_mask]

                    # Generate clean annotated image
                    annotated = result.plot()
                    output_path = save_dir / img_path.name
                    cv2.imwrite(str(output_path), annotated)
                else:
                    # All detections were String Bean - save original image without annotations
                    img = cv2.imread(str(img_path))
                    output_path = save_dir / img_path.name
                    cv2.imwrite(str(output_path), img)
            else:
                # No detections - save original
                img = cv2.imread(str(img_path))
                output_path = save_dir / img_path.name
                cv2.imwrite(str(output_path), img)

    print(f"\n--- CLEANING SUMMARY ---")
    print(f" Processed {total_images} images")
    print(f" {images_with_string_bean} images had String Bean detections")
    print(f" Removed {string_bean_detections_removed} String Bean detection(s) total")
    print(f" Clean images saved to: {save_dir}")

# === ANALYSIS: Show what we're filtering out ===
print(f"\n=== ANALYSIS: What String Bean predictions look like ===")

# Test a bunch of images to find String Bean predictions
test_files = list(test_images.glob("*.jpg")) + list(test_images.glob("*.png"))
string_bean_examples = []

for img_path in test_files[:50]:  # Check first 50 images
    results = model.predict(source=str(img_path), conf=CONF, save=False, verbose=False)

    if len(results) > 0 and results[0].boxes is not None:
        boxes = results[0].boxes
        string_bean_mask = boxes.cls == REMOVE_CLASS_ID

        if string_bean_mask.any():
            string_bean_confs = boxes.conf[string_bean_mask]
            string_bean_examples.append({
                'image': img_path.name,
                'count': string_bean_mask.sum().item(),
                'max_conf': string_bean_confs.max().item(),
                'avg_conf': string_bean_confs.mean().item()
            })

if string_bean_examples:
    print(f"Found String Bean predictions in {len(string_bean_examples)} sample images:")
    for example in string_bean_examples[:5]:  # Show first 5
        print(f"  {example['image']}: {example['count']} detections, max_conf={example['max_conf']:.3f}")

    total_removed = sum(ex['count'] for ex in string_bean_examples)
    avg_conf = np.mean([ex['avg_conf'] for ex in string_bean_examples])
    print(f"  Total String Bean detections found in sample: {total_removed}")
    print(f"  Average confidence: {avg_conf:.3f}")
else:
    print("No String Bean predictions found in sample images")

print(f"\n COMPLETE! String Bean Chicken Breast has been completely removed from:")
print(f"    Evaluation metrics")
print(f"    Visual predictions")
print(f"    All outputs")
print(f"\n Our model results are now clean!")


In [None]:
import shutil
from google.colab import files

# Zip the results folder
shutil.make_archive('/content/yolo_results', 'zip', '/content/yolo_results')

# Download the zip file
files.download('/content/yolo_results.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>