In [1]:
import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
import os

# Define your local dataset root path
DATA_ROOT = "/Users/vinaykasa/Documents/coding-practice/code/"

# Walk through the dataset directory and print all file paths
for dirname, _, filenames in os.walk(DATA_ROOT):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/Users/vinaykasa/Documents/coding-practice/code/code.ipynb
/Users/vinaykasa/Documents/coding-practice/code/train_coco.json
/Users/vinaykasa/Documents/coding-practice/code/val_coco.json
/Users/vinaykasa/Documents/coding-practice/code/finalproject.ipynb
/Users/vinaykasa/Documents/coding-practice/code/archive-4/0Val_via_annos.json
/Users/vinaykasa/Documents/coding-practice/code/archive-4/0Train_via_annos.json
/Users/vinaykasa/Documents/coding-practice/code/archive-4/image/image/03022020_161010image945451.jpg
/Users/vinaykasa/Documents/coding-practice/code/archive-4/image/image/10012020_095236image503936.jpg
/Users/vinaykasa/Documents/coding-practice/code/archive-4/image/image/30012020_104507image76197.jpg
/Users/vinaykasa/Documents/coding-practice/code/archive-4/image/image/14012020_111701image873706.jpg
/Users/vinaykasa/Documents/coding-practice/code/archive-4/image/image/16012020_081638image407983.jpg
/Users/vinaykasa/Documents/coding-practice/code/archive-4/image/image/05012021_114600i

In [2]:
import os
import json
from PIL import Image

# Paths
train_annotations = '/Users/vinaykasa/Documents/coding-practice/code/archive-4/0Train_via_annos.json'
train_images = '/Users/vinaykasa/Documents/coding-practice/code/archive-4/image/image'
val_annotations = '/Users/vinaykasa/Documents/coding-practice/code/archive-4/0Val_via_annos.json'
val_images = '/Users/vinaykasa/Documents/coding-practice/code/archive-4/validation/validation'

# Class mapping (COCO: 1-based index)
CLASSES = {
    'mat_bo_phan': 0,
    'rach': 1,
    'mop_lom': 2,
    'tray_son': 3,
    'thung': 4,
    'vo_kinh': 5,
    'be_den': 6
}

def polygon_to_bbox(x_points, y_points):
    """Convert polygon points to COCO bbox format."""
    x_min = max(0.0, float(min(x_points)))
    y_min = max(0.0, float(min(y_points)))
    x_max = float(max(x_points))
    y_max = float(max(y_points))
    width = max(1.0, x_max - x_min)
    height = max(1.0, y_max - y_min)
    return [x_min, y_min, width, height]

def convert_via_to_coco(via_json, image_dir, output_file):
    with open(via_json, 'r') as f:
        data = json.load(f)

    coco = {
        "images": [],
        "annotations": [],
        "categories": [{"id": v, "name": k} for k, v in CLASSES.items()]
    }

    ann_id = 1
    img_id = 1
    skipped = 0

    for filename, file_data in data.items():
        img_path = os.path.join(image_dir, filename)
        try:
            with Image.open(img_path) as img:
                width, height = img.size
        except Exception as e:
            print(f"Skipping image {filename}: {e}")
            continue

        coco["images"].append({
            "id": img_id,
            "file_name": filename,
            "width": width,
            "height": height
        })

        for region in file_data.get("regions", []):
            all_x = region.get("all_x")
            all_y = region.get("all_y")
            label = region.get("class")

            if not all_x or not all_y or label not in CLASSES:
                skipped += 1
                continue

            bbox = polygon_to_bbox(all_x, all_y)
            x, y, w, h = bbox
            x = min(x, width - 1)
            y = min(y, height - 1)
            w = min(w, width - x)
            h = min(h, height - y)

            if w <= 0 or h <= 0:
                skipped += 1
                continue

            coco["annotations"].append({
                "id": ann_id,
                "image_id": img_id,
                "category_id": CLASSES[label],
                "bbox": [x, y, w, h],
                "area": w * h,
                "iscrowd": 0
            })
            ann_id += 1

        img_id += 1

    with open(output_file, 'w') as f:
        json.dump(coco, f, indent=2)

    print(f"Saved: {output_file}")
    print(f"Total images: {img_id - 1}, Total annotations: {ann_id - 1}, Skipped: {skipped}")

# Run conversion
print("Processing training set...")
convert_via_to_coco(train_annotations, train_images, 'train_coco.json')

print("\nProcessing validation set...")
convert_via_to_coco(val_annotations, val_images, 'val_coco.json')


Processing training set...
Saved: train_coco.json
Total images: 11621, Total annotations: 30046, Skipped: 0

Processing validation set...
Saved: val_coco.json
Total images: 2324, Total annotations: 6035, Skipped: 0


In [3]:
!pip install albumentations



In [4]:
import os
import json
import cv2
import torch
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import albumentations as A
from albumentations.pytorch import ToTensorV2

import torch

if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f"[INFO] Using device: {device}")

# Constants
IMAGE_SIZE = 512
NUM_CLASSES = 8  # 7 classes + background

# ----------------------- Dataset Definition -----------------------
class VehicleDamageDataset(Dataset):
    def __init__(self, coco_json, img_dir, transforms=None):
        self.img_dir = img_dir
        self.transforms = transforms

        with open(coco_json, 'r') as f:
            coco = json.load(f)

        self.image_dict = {img['id']: img for img in coco['images']}
        self.annotations = {}
        for ann in coco['annotations']:
            self.annotations.setdefault(ann['image_id'], []).append(ann)

        self.image_ids = list(self.annotations.keys())

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_info = self.image_dict[image_id]
        image_path = os.path.join(self.img_dir, image_info['file_name'])

        image = cv2.imread(image_path)
        if image is None:
            raise FileNotFoundError(f"Image not found: {image_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        anns = self.annotations.get(image_id, [])
        boxes, labels = [], []

        for ann in anns:
            x, y, w, h = ann['bbox']
            if w > 0 and h > 0:
                boxes.append([x, y, x + w, y + h])
                labels.append(ann['category_id'])

        boxes = np.array(boxes, dtype=np.float32)
        labels = np.array(labels, dtype=np.int64)

        if self.transforms:
            if len(boxes) > 0:
                transformed = self.transforms(image=image, bboxes=boxes, labels=labels)
                image = transformed['image']
                boxes = torch.as_tensor(transformed['bboxes'], dtype=torch.float32)
                labels = torch.as_tensor(transformed['labels'], dtype=torch.int64)
            else:
                image = self.transforms(image=image, bboxes=[], labels=[])['image']
                boxes = torch.zeros((0, 4), dtype=torch.float32)
                labels = torch.zeros((0,), dtype=torch.int64)
        else:
            image = torch.from_numpy(image.transpose((2, 0, 1))).float() / 255.0
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([image_id])
        }

        return image, target

# ----------------------- Model Definition -----------------------
def get_model(num_classes):
    backbone = resnet_fpn_backbone('resnet50', pretrained=False)
    model = FasterRCNN(backbone, num_classes=num_classes)
    return model

# ----------------------- Transforms -----------------------
def get_transforms(train=True):
    transforms = [
        A.Resize(IMAGE_SIZE, IMAGE_SIZE),
        A.Normalize(mean=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225)),
        ToTensorV2()
    ]
    if train:
        transforms.insert(0, A.HorizontalFlip(p=0.5))
        transforms.insert(1, A.RandomBrightnessContrast(p=0.2))
    return A.Compose(
        transforms,
        bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels'])
    )

# ----------------------- Data Loaders -----------------------
def get_data_loaders(train_json, val_json, train_img_dir, val_img_dir, batch_size=4):
    train_dataset = VehicleDamageDataset(train_json, train_img_dir, transforms=get_transforms(train=True))
    val_dataset = VehicleDamageDataset(val_json, val_img_dir, transforms=get_transforms(train=False))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                              num_workers=2, collate_fn=lambda b: tuple(zip(*b)))
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False,
                            num_workers=2, collate_fn=lambda b: tuple(zip(*b)))

    print(f"[INFO] Train dataset: {len(train_dataset)} images")
    print(f"[INFO] Validation dataset: {len(val_dataset)} images")
    return train_loader, val_loader

# ----------------------- Execution -----------------------
if __name__ == "__main__":
    model = get_model(NUM_CLASSES).to(device)
    train_loader, val_loader = get_data_loaders(
        train_json='/Users/vinaykasa/Documents/coding-practice/code/train_coco.json',
        val_json='/Users/vinaykasa/Documents/coding-practice/code/val_coco.json',
        train_img_dir='/Users/vinaykasa/Documents/coding-practice/code/archive-4/image/image',
        val_img_dir='/Users/vinaykasa/Documents/coding-practice/code/archive-4/validation/validation',
        batch_size=4
    )

    print("[INFO] Model and DataLoaders are ready.")


[INFO] Using device: mps




[INFO] Train dataset: 11621 images
[INFO] Validation dataset: 2324 images
[INFO] Model and DataLoaders are ready.


In [5]:
!pip install torchmetrics




In [6]:
!pip install -q torchmetrics


In [7]:
#!/usr/bin/env python3
# -------------------------------------------------------------
# Faster R‑CNN (ResNet‑50‑FPN) on COCO‑style Vehide dataset
# macOS / CPU‑only version – no AMP, no MPS, no CUDA
# -------------------------------------------------------------
# ----------------------------------------------------
# place these two lines **before** importing torch
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"   # CPU‑fallback for missing ops
# ----------------------------------------------------

import torch, torchvision   # ← now import the stack
device = torch.device("mps")  # or autodetect
import os, json, cv2, platform
from pathlib import Path
from tqdm import tqdm
import numpy as np

import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR

from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.ops import box_iou

from torchmetrics.detection.mean_ap import MeanAveragePrecision
import albumentations as A
from albumentations.pytorch import ToTensorV2

# --------------------- CONFIG ---------------------
CONFIG = {
    # update these three paths ↓↓↓
    "TRAIN_JSON": "/Users/vinaykasa/Documents/coding-practice/code/train_coco.json",
    "VAL_JSON"  : "/Users/vinaykasa/Documents/coding-practice/code/val_coco.json",
    "DATA_ROOT" : "/Users/vinaykasa/Documents/coding-practice/code/archive-4",  # contains image/ and validation/

    # training hyper‑params
    "num_classes"         : 8,           # background + 7 defect classes?
    "num_epochs"          : 50,
    "batch_size"          : 8,           # safe for CPU RAM
    "learning_rate"       : 1e-3,
    "weight_decay"        : 5e-4,
    "step_lr_patience"    : 3,
    "step_lr_gamma"       : 0.1,
    "max_steps_per_epoch" : 450,
    "img_size"            : 512,
    "best_ckpt"           : "best_model.pth",
}
# --------------------------------------------------

# ------------------ DATASET -----------------------
class VehicleDamageDataset(torch.utils.data.Dataset):
    """
    COCO‑style dataset wrapper.
    """

    def __init__(self, coco_json: str, img_dir: str, transforms=None):
        self.img_dir   = img_dir
        self.transforms = transforms

        with open(coco_json, 'r') as f:
            coco = json.load(f)

        self.img_map = {img["id"]: img for img in coco["images"]}
        self.ann_map = {}
        for ann in coco["annotations"]:
            self.ann_map.setdefault(ann["image_id"], []).append(ann)

        self.ids = list(self.ann_map.keys())

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        img_id   = self.ids[idx]
        img_info = self.img_map[img_id]
        img_path = os.path.join(self.img_dir, img_info["file_name"])

        # read + BGR→RGB
        img = cv2.imread(img_path)
        if img is None:
            raise FileNotFoundError(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # build boxes / labels
        boxes, labels = [], []
        for a in self.ann_map.get(img_id, []):
            x, y, w, h = a["bbox"]
            boxes.append([x, y, x + w, y + h])
            labels.append(a["category_id"])

        # Albumentations
        if self.transforms:
            tr = self.transforms(image=img, bboxes=boxes, labels=labels)
            img    = tr["image"]
            boxes  = torch.tensor(tr["bboxes"], dtype=torch.float32)
            labels = torch.tensor(tr["labels"], dtype=torch.int64)
        else:
            img = torch.from_numpy(img.transpose(2, 0, 1)).float() / 255.0
            boxes  = torch.tensor(boxes , dtype=torch.float32)
            labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            "boxes"   : boxes,
            "labels"  : labels,
            "image_id": torch.tensor([img_id])
        }
        return img, target

# --------------- TRANSFORMS -----------------------
def get_transforms(train: bool = True):
    base = [
        A.Resize(CONFIG["img_size"], CONFIG["img_size"]),
        A.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
        ToTensorV2()
    ]
    if train:
        return A.Compose(
            [A.HorizontalFlip(p=0.5),
             A.RandomBrightnessContrast(p=0.2)] + base,
            bbox_params=A.BboxParams(format="pascal_voc",
                                     label_fields=["labels"])
        )
    return A.Compose(base,
                     bbox_params=A.BboxParams(format="pascal_voc",
                                              label_fields=["labels"]))

# ------------------ MODEL -------------------------
def get_model(num_classes: int):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_feats = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_feats, num_classes)
    return model

# -------------- UTILITIES -------------------------
def collate_fn(batch):
    return tuple(zip(*batch))

def train_one_epoch(model, loader, optimizer, device):
    model.train()
    total_loss, step = 0.0, 0
    pbar = tqdm(loader, total=min(len(loader), CONFIG["max_steps_per_epoch"]))
    for imgs, targs in pbar:
        if step >= CONFIG["max_steps_per_epoch"]:
            break
        imgs   = [x.to(device) for x in imgs]
        targs  = [{k: v.to(device) for k,v in t.items()} for t in targs]

        loss_d = model(imgs, targs)
        loss   = sum(loss_d.values())
        loss.backward()

        optimizer.step(); optimizer.zero_grad()

        total_loss += loss.item()
        pbar.set_description(f"loss: {loss.item():.4f}")
        step += 1
    return total_loss / max(step,1)

@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    mAP = MeanAveragePrecision(box_format="xyxy", class_metrics=False).to(device)

    correct = tot_pred = tot_gt = 0
    for imgs, targs in tqdm(loader, desc="val"):
        imgs  = [x.to(device) for x in imgs]
        targs = [{k: v.to(device) for k,v in t.items()} for t in targs]
        outs  = model(imgs)

        preds = []
        gts   = []
        for o, t in zip(outs, targs):
            keep = o["scores"] > 0.3
            preds.append({
                "boxes" : o["boxes"][keep].cpu(),
                "scores": o["scores"][keep].cpu(),
                "labels": o["labels"][keep].cpu()
            })
            gts.append({
                "boxes" : t["boxes"].cpu(),
                "labels": t["labels"].cpu()
            })

            # simple precision/recall
            if len(preds[-1]["boxes"]) and len(gts[-1]["boxes"]):
                iou = box_iou(preds[-1]["boxes"], gts[-1]["boxes"])
                matches = (iou >= 0.3)
                for i in range(matches.size(0)):
                    for j in range(matches.size(1)):
                        if matches[i,j] and preds[-1]["labels"][i]==gts[-1]["labels"][j]:
                            correct += 1
                            matches[:,j] = False
                            break
            tot_pred += len(preds[-1]["labels"])
            tot_gt   += len(gts[-1]["labels"])

        mAP.update(preds, gts)

    prec = correct / max(tot_pred,1)
    rec  = correct / max(tot_gt,1)
    f1   = (2*prec*rec) / (prec+rec+1e-8)
    return mAP.compute()["map"], prec, rec, f1

# ------------------- MAIN -------------------------
def main():
    device = torch.device("cpu")
    print(f"[INFO] device: {device.type}")

    # datasets / loaders
    train_ds = VehicleDamageDataset(
        CONFIG["TRAIN_JSON"],
        os.path.join(CONFIG["DATA_ROOT"], "image", "image"),
        transforms=get_transforms(train=True)
    )
    val_ds   = VehicleDamageDataset(
        CONFIG["VAL_JSON"],
        os.path.join(CONFIG["DATA_ROOT"], "validation", "validation"),
        transforms=get_transforms(train=False)
    )
    train_dl = DataLoader(train_ds, batch_size=CONFIG["batch_size"],
                          shuffle=True, num_workers=0, collate_fn=collate_fn)
    val_dl   = DataLoader(val_ds, batch_size=CONFIG["batch_size"],
                          shuffle=False, num_workers=0, collate_fn=collate_fn)

    # model / optim
    model = get_model(CONFIG["num_classes"]).to(device)
    optim_ = optim.SGD(model.parameters(), lr=CONFIG["learning_rate"],
                       momentum=0.9, weight_decay=CONFIG["weight_decay"])
    sched  = StepLR(optim_, step_size=CONFIG["step_lr_patience"],
                    gamma=CONFIG["step_lr_gamma"])

    best_map = 0.0
    for epoch in range(CONFIG["num_epochs"]):
        print(f"\nEpoch {epoch+1}/{CONFIG['num_epochs']}")
        loss = train_one_epoch(model, train_dl, optim_, device)
        mAP, prec, rec, f1 = evaluate(model, val_dl, device)
        print(f" ↳ loss {loss:.4f} | mAP {mAP:.4f} | P {prec:.4f} | R {rec:.4f} | F1 {f1:.4f}")

        if mAP > best_map:
            best_map = mAP
            torch.save({
                "epoch": epoch,
                "model": model.state_dict(),
                "optim": optim_.state_dict(),
                "mAP"  : best_map
            }, CONFIG["best_ckpt"])
            print(f"[✓] saved new best checkpoint (mAP={best_map:.4f})")

        sched.step()

    print("\nTraining finished.")
    if Path(CONFIG["best_ckpt"]).exists():
        ckpt = torch.load(CONFIG["best_ckpt"], map_location=device)
        print(f"[INFO] best epoch = {ckpt['epoch']+1}, best mAP = {ckpt['mAP']:.4f}")

if __name__ == "__main__":
    main()

[INFO] device: cpu





Epoch 1/50


loss: 0.5275:  10%|█         | 46/450 [17:08<2:30:29, 22.35s/it]


KeyboardInterrupt: 

In [None]:
#!/usr/bin/env python3
# -------------------------------------------------------------
# 0️⃣  Set env var FIRST, before importing torch / torchvision
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"      # CPU fallback

# 1️⃣  Now import torch / torchvision and monkey‑patch NMS
import torch, torchvision
from torchvision.ops import nms as _nms_cpu
def _nms_fallback(boxes, scores, iou_thr):
    if boxes.device.type != "cpu":                   # MPS / CUDA
        idx = _nms_cpu(boxes.cpu(), scores.cpu(), iou_thr)
        return idx.to(boxes.device)                  # send back
    return _nms_cpu(boxes, scores, iou_thr)
torchvision.ops.nms = _nms_fallback
torchvision.ops.batched_nms = _nms_fallback

# ----------------------------------------------------------------------
import json, cv2
from pathlib import Path
from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.ops import box_iou
from torchmetrics.detection.mean_ap import MeanAveragePrecision

# ----------------------- CONFIG ---------------------------------------
CONFIG = {
    "TRAIN_JSON": "/Users/vinaykasa/Documents/coding-practice/code/train_coco.json",
    "VAL_JSON"  : "/Users/vinaykasa/Documents/coding-practice/code/val_coco.json",
    "DATA_ROOT" : "/Users/vinaykasa/Documents/coding-practice/code/archive-4",
    "num_classes": 8,
    "num_epochs": 50,
    "batch_size": 8,
    "learning_rate": 1e-3,
    "weight_decay": 5e-4,
    "step_lr_patience": 3,
    "step_lr_gamma": 0.1,
    "max_steps_per_epoch": 450,
    "img_size": 512,
    "best_ckpt": "best_model_mps.pth",
}
# ----------------------------------------------------------------------

# -------------------- DATASET ----------------------------------------
class VehicleDamageDataset(torch.utils.data.Dataset):
    def __init__(self, coco_json, img_dir, transforms=None):
        self.img_dir, self.transforms = img_dir, transforms
        with open(coco_json, "r") as f:
            coco = json.load(f)
        self.img_map = {img["id"]: img for img in coco["images"]}
        self.ann_map = {}
        for ann in coco["annotations"]:
            self.ann_map.setdefault(ann["image_id"], []).append(ann)
        self.ids = list(self.ann_map.keys())

    def __len__(self): return len(self.ids)

    def __getitem__(self, idx):
        img_id = self.ids[idx]
        meta   = self.img_map[img_id]
        path   = os.path.join(self.img_dir, meta["file_name"])
        img = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)

        boxes, labels = [], []
        for a in self.ann_map.get(img_id, []):
            x, y, w, h = a["bbox"]
            boxes.append([x, y, x + w, y + h])
            labels.append(a["category_id"])

        if self.transforms:
            tr  = self.transforms(image=img, bboxes=boxes, labels=labels)
            img = tr["image"]
            boxes  = torch.tensor(tr["bboxes"], dtype=torch.float32)
            labels = torch.tensor(tr["labels"], dtype=torch.int64)
        else:
            img = torch.from_numpy(img.transpose(2,0,1)).float() / 255.0
            boxes  = torch.tensor(boxes, dtype=torch.float32)
            labels = torch.tensor(labels, dtype=torch.int64)

        return img, {"boxes": boxes, "labels": labels, "image_id": torch.tensor([img_id])}

def get_transforms(train=True):
    base = [
        A.Resize(CONFIG["img_size"], CONFIG["img_size"]),
        A.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
        ToTensorV2()
    ]
    if train:
        return A.Compose([A.HorizontalFlip(p=0.5),
                          A.RandomBrightnessContrast(p=0.2)]+base,
                         bbox_params=A.BboxParams(format="pascal_voc",
                                                  label_fields=["labels"]))
    return A.Compose(base,bbox_params=A.BboxParams(format="pascal_voc",
                                                   label_fields=["labels"]))

# ------------------- MODEL -------------------------------------------
def get_model(nc):
    model = fasterrcnn_resnet50_fpn(pretrained=True)
    in_f  = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_f, nc)
    return model

def collate_fn(b): return tuple(zip(*b))

# ------------------- TRAIN / EVAL ------------------------------------
def train_one_epoch(model, dl, opt, dev):
    model.train()
    tot, step = 0., 0
    pbar = tqdm(dl, total=min(len(dl), CONFIG["max_steps_per_epoch"]))
    for imgs, targs in pbar:
        if step >= CONFIG["max_steps_per_epoch"]: break
        imgs  = [i.to(dev) for i in imgs]
        targs = [{k:v.to(dev) for k,v in t.items()} for t in targs]

        loss = sum(model(imgs, targs).values())
        loss.backward()
        opt.step(); opt.zero_grad()

        tot += loss.item(); step += 1
        pbar.set_description(f"loss {loss.item():.4f}")
    return tot / max(step,1)

@torch.no_grad()
def evaluate(model, dl, dev):
    model.eval()
    mAP = MeanAveragePrecision(box_format="xyxy").to(dev)
    corr = tot_p = tot_t = 0

    for imgs, targs in tqdm(dl, desc="val"):
        imgs = [i.to(dev) for i in imgs]
        targs = [{k:v.to(dev) for k,v in t.items()} for t in targs]
        outs  = model(imgs)

        preds, gts = [], []
        for o, t in zip(outs, targs):
            keep = o["scores"] > 0.3
            preds.append({"boxes": o["boxes"][keep].cpu(),
                          "scores": o["scores"][keep].cpu(),
                          "labels": o["labels"][keep].cpu()})
            gts.append({"boxes": t["boxes"].cpu(),
                        "labels": t["labels"].cpu()})

            if preds[-1]["boxes"].numel() and gts[-1]["boxes"].numel():
                iou = box_iou(preds[-1]["boxes"], gts[-1]["boxes"])
                m   = (iou >= 0.3)
                for i in range(m.size(0)):
                    for j in range(m.size(1)):
                        if m[i,j] and preds[-1]["labels"][i]==gts[-1]["labels"][j]:
                            corr += 1; m[:,j]=False; break
            tot_p += len(preds[-1]["labels"]); tot_t += len(gts[-1]["labels"])
        mAP.update(preds, gts)

    P = corr/max(tot_p,1); R = corr/max(tot_t,1)
    F1 = 2*P*R/(P+R+1e-8)
    return mAP.compute()["map"], P, R, F1

# -------------------- MAIN -------------------------------------------
def main():
    # ❸ Device selection: prefer MPS, else CPU
    device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
    print(f"[INFO] training on: {device}")

    # data
    root = Path(CONFIG["DATA_ROOT"])
    tr_ds = VehicleDamageDataset(CONFIG["TRAIN_JSON"], root/"image"/"image",
                                 get_transforms(True))
    va_ds = VehicleDamageDataset(CONFIG["VAL_JSON"], root/"validation"/"validation",
                                 get_transforms(False))
    tr_dl = DataLoader(tr_ds, batch_size=CONFIG["batch_size"],
                       shuffle=True,  collate_fn=collate_fn)
    va_dl = DataLoader(va_ds, batch_size=CONFIG["batch_size"],
                       shuffle=False, collate_fn=collate_fn)

    # model / optim
    model = get_model(CONFIG["num_classes"]).to(device)
    opt   = torch.optim.SGD(model.parameters(), lr=CONFIG["learning_rate"],
                            momentum=0.9, weight_decay=CONFIG["weight_decay"])
    sch   = StepLR(opt, CONFIG["step_lr_patience"], CONFIG["step_lr_gamma"])

    best = 0.0
    for epoch in range(CONFIG["num_epochs"]):
        print(f"\nEpoch {epoch+1}/{CONFIG['num_epochs']}")
        tloss = train_one_epoch(model, tr_dl, opt, device)
        mAP, P, R, F1 = evaluate(model, va_dl, device)
        print(f" ↳ loss {tloss:.4f} | mAP {mAP:.4f} | P {P:.4f} | R {R:.4f} | F1 {F1:.4f}")

        if mAP > best:
            best = mAP
            torch.save({"epoch":epoch,
                        "model":model.state_dict(),
                        "optim":opt.state_dict(),
                        "mAP":best}, CONFIG["best_ckpt"])
            print(f"[✓] new best checkpoint (mAP={best:.4f}) saved")
        sch.step()

    print(f"\nDone. Best mAP={best:.4f}")

if __name__ == "__main__":
    main()

[INFO] device: mps
[INFO] training on: mps





Epoch 1/50


  0%|          | 0/450 [00:07<?, ?it/s]


NotImplementedError: The operator 'torchvision::nms' is not currently implemented for the MPS device. If you want this op to be considered for addition please comment on https://github.com/pytorch/pytorch/issues/141287 and mention use-case, that resulted in missing op as well as commit hash Unknown. As a temporary fix, you can set the environment variable `PYTORCH_ENABLE_MPS_FALLBACK=1` to use the CPU as a fallback for this op. WARNING: this will be slower than running natively on MPS.