# Car Flaw Detection – YOLOv8 (Apple Silicon / MPS)  
End-to-end notebook to:
1) Configure Apple Silicon for PyTorch MPS  
2) Download dataset from Roboflow (YOLOv8 format)  
3) Train a YOLOv8 model  
4) Validate & run inference  
5) Apply business rules to compute defect scores from predicted bounding boxes  
6) Save annotated outputs and a CSV report of decisions

**Scoring rules**
- If total bbox area ≥ 20% of image → **Critical**, **defective**
- If 15%–20% → **Major** → if bbox count > 2 → **defective**, else **non-defective**
- If 10%–15% → **Minor** → if bbox count > 5 → **defective**, else **non-defective**
- If < 10% → **Acceptable** → if bbox count > 10 → **defective**, else **non-defective**

> Note: Box area is computed as the sum of (x2-x1)*(y2-y1) for predicted boxes per image (no overlap de-duplication). Modify thresholds in the constants section if needed.


In [None]:
# %% [setup] Optional installs (uncomment as needed)
# !pip install --upgrade pip
# !pip install ultralytics roboflow torch torchvision torchaudio matplotlib pandas numpy opencv-python-headless


In [None]:
# %% Imports & device
import os, sys, math, time, json, random, shutil
from pathlib import Path
from dataclasses import dataclass

import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from ultralytics import YOLO

# Prefer Apple Metal (MPS) on Apple Silicon, else CUDA/CPU
if torch.backends.mps.is_available():
    DEVICE = "mps"
elif torch.cuda.is_available():
    DEVICE = "cuda"
else:
    DEVICE = "cpu"
print(f"Using device: {DEVICE}")

# Reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if DEVICE == "cuda":
    torch.cuda.manual_seed_all(42)


In [None]:
# %% Dataset: Roboflow (YOLOv8 format)
# Provided credentials / dataset reference
# If roboflow isn't installed in your env, run the install cell above.

from roboflow import Roboflow
rf = Roboflow(api_key="Hbe2laKcVL5gAzL7ntgo")
project = rf.workspace("shaheer-ahmad-7ipmv").project("car-defect-cgxsg")
version = project.version(1)
dataset = version.download("yolov8")

# Roboflow returns an object with a local directory path; data.yaml lives there
DATA_DIR = Path(getattr(dataset, "location", dataset.location if hasattr(dataset, "location") else dataset))
DATA_YAML = DATA_DIR / "data.yaml"
assert DATA_YAML.exists(), f"data.yaml not found at {DATA_YAML}"
print("Dataset at:", DATA_DIR)
print("data.yaml:", DATA_YAML)


In [None]:
# %% Training configuration
MODEL_NAME = "yolov8n.pt"   # Start small; try yolov8s.pt/m.pt later
PROJECT_NAME = "car_flaw_detection"
RUN_NAME = "yolov8n_mps_v1"

EPOCHS = 50
IMGSZ = 640
BATCH = 16  # reduce if you hit OOM on MPS


In [None]:
# %% Train
model = YOLO(MODEL_NAME)
print("Starting training…")
train_results = model.train(
    data=str(DATA_YAML),
    epochs=EPOCHS,
    imgsz=IMGSZ,
    batch=BATCH,
    device=DEVICE,
    project=PROJECT_NAME,
    name=RUN_NAME,
    pretrained=True,
    optimizer="auto",
)

# Path to best weights (Ultralytics stores under trainer.save_dir)
best_weights = Path(model.trainer.save_dir) / "weights" / "best.pt"
print("Best weights:", best_weights, "| exists:", best_weights.exists())


In [None]:
# %% Validate
best_model = YOLO(str(best_weights))
print("Validating best model…")
val_metrics = best_model.val(
    data=str(DATA_YAML),
    imgsz=IMGSZ,
    device=DEVICE,
)
val_metrics


In [None]:
# %% Scoring helpers (business rules)
from dataclasses import dataclass

@dataclass
class DefectDecision:
    image_path: str
    bbox_count: int
    area_pct: float
    severity: str   # "Critical" | "Major" | "Minor" | "Acceptable"
    defective: bool

# Thresholds
CRITICAL_PCT = 20.0
MAJOR_LOW, MAJOR_HIGH = 15.0, 20.0
MINOR_LOW, MINOR_HIGH = 10.0, 15.0
ACCEPTABLE_HIGH = 10.0

def score_from_boxes(boxes_xyxy: np.ndarray, img_w: int, img_h: int) -> DefectDecision:
    """
    Compute total bbox area %, count and decision from xyxy boxes in pixel coords.
    boxes_xyxy: (N,4) array of [x1,y1,x2,y2] in pixels.
    """
    n = 0 if boxes_xyxy is None else int(len(boxes_xyxy))
    if n == 0:
        area_pct = 0.0
    else:
        x1 = np.clip(boxes_xyxy[:, 0], 0, img_w)
        y1 = np.clip(boxes_xyxy[:, 1], 0, img_h)
        x2 = np.clip(boxes_xyxy[:, 2], 0, img_w)
        y2 = np.clip(boxes_xyxy[:, 3], 0, img_h)
        w = np.maximum(0, x2 - x1)
        h = np.maximum(0, y2 - y1)
        total_area = float(np.sum(w * h))
        img_area = float(img_w * img_h)
        area_pct = (total_area / img_area) * 100.0 if img_area > 0 else 0.0

    if area_pct >= CRITICAL_PCT:
        severity = "Critical"
        defective = True
    elif MAJOR_LOW <= area_pct < MAJOR_HIGH:
        severity = "Major"
        defective = n > 2
    elif MINOR_LOW <= area_pct < MINOR_HIGH:
        severity = "Minor"
        defective = n > 5
    else:
        severity = "Acceptable"
        defective = n > 10

    return DefectDecision(
        image_path="",
        bbox_count=n,
        area_pct=round(area_pct, 3),
        severity=severity,
        defective=bool(defective),
    )


In [None]:
# %% Batch inference & CSV report
from pathlib import Path

RESULTS_DIR = Path("results_inference")
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

# Validation images as inference source by default
val_images = sorted((DATA_DIR / "valid" / "images").glob("*.jpg")) + \
             sorted((DATA_DIR / "valid" / "images").glob("*.png")) + \
             sorted((DATA_DIR / "valid" / "images").glob("*.jpeg"))

if len(val_images) == 0:
    # Alternative split name
    val_images = sorted((DATA_DIR / "val" / "images").glob("*.jpg")) + \
                 sorted((DATA_DIR / "val" / "images").glob("*.png")) + \
                 sorted((DATA_DIR / "val" / "images").glob("*.jpeg"))

print(f"Found {len(val_images)} validation images for inference.")

pred = list(best_model.predict(
    source=[str(p) for p in val_images],
    device=DEVICE,
    imgsz=IMGSZ,
    conf=0.25,
    iou=0.45,
    save=True,                      # saves annotated images
    project=str(RESULTS_DIR),
    name="pred_annotated",
    exist_ok=True,
))

records = []
for r in pred:
    h, w = r.orig_shape
    boxes_xyxy = r.boxes.xyxy.cpu().numpy() if r.boxes is not None else np.zeros((0,4))
    decision = score_from_boxes(boxes_xyxy, w, h)
    decision.image_path = r.path

    records.append({
        "image": r.path,
        "bbox_count": decision.bbox_count,
        "area_pct": decision.area_pct,
        "severity": decision.severity,
        "defective": decision.defective,
    })

report_df = pd.DataFrame.from_records(records)
report_csv = RESULTS_DIR / "car_flaw_decisions.csv"
report_df.to_csv(report_csv, index=False)
print("Saved report:", report_csv)
report_df.head(10)


In [None]:
# %% Single-image example & visualization
# If OpenCV is missing, run: pip install opencv-python-headless
import cv2

def visualize_with_title(img_path: str, title: str):

    img = cv2.imread(img_path)
    if img is None:
        print(f"Could not read {img_path}")
        return
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.figure(figsize=(8,6))
    plt.imshow(img_rgb)
    plt.title(title)
    plt.axis('off')
    plt.show()

if len(val_images):
    example = str(val_images[0])
    example_pred = best_model(example, imgsz=IMGSZ, device=DEVICE, conf=0.25, iou=0.45)[0]
    H, W = example_pred.orig_shape
    boxes = example_pred.boxes.xyxy.cpu().numpy() if example_pred.boxes is not None else np.zeros((0,4))
    ex_decision = score_from_boxes(boxes, W, H)
    title = f"%Area={ex_decision.area_pct:.2f}% | boxes={ex_decision.bbox_count} | {ex_decision.severity} | defective={ex_decision.defective}"

    annotated_dir = RESULTS_DIR / "pred_annotated"
    ann_candidate = annotated_dir / Path(example).name
    visualize_with_title(str(ann_candidate if ann_candidate.exists() else example), title)


In [None]:
# %% Optional export for deployment
# best_model.export(format="onnx")
# best_model.export(format="coreml")


In [None]:
# %% Inference utility for production use
def predict_and_score(image_path: str, model_path: str = None):
    m = YOLO(model_path or str(best_weights))
    r = m(image_path, device=DEVICE, imgsz=IMGSZ, conf=0.25, iou=0.45)[0]
    H, W = r.orig_shape
    boxes = r.boxes.xyxy.cpu().numpy() if r.boxes is not None else np.zeros((0,4))
    decision = score_from_boxes(boxes, W, H)
    decision.image_path = image_path
    return decision, r

# Example usage:
# d, res = predict_and_score("/path/to/car_image.jpg")
# print(d)
# res.show()  # or use Ultralytics save options
