In [4]:



# train_frcnn_pennfudan.py
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# ---- Albumentations (optional) ----
import albumentations as A
from albumentations.pytorch import ToTensorV2

# ---- Import helper functions from references/detection ----
# Make sure engine.py, utils.py, coco_eval.py, coco_utils.py are in ./references/detection/
import sys
sys.path.append(os.getcwd())  # if notebook is in same folder

from engine import train_one_epoch, evaluate
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator

# ------------------ Dataset ------------------
class PennFudanDataset(Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # Load image and mask
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path))

        # Instances are encoded as different colors
        obj_ids = np.unique(mask)
        obj_ids = obj_ids[1:]  # remove background

        # Split into binary masks
        masks = mask == obj_ids[:, None, None]
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        # Get bounding boxes
        boxes = []
        for m in masks:
            pos = np.where(m)
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])
        boxes = torch.as_tensor(boxes, dtype=torch.float32)

        labels = torch.ones((len(obj_ids),), dtype=torch.int64)
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(obj_ids),), dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "masks": masks,
            "image_id": image_id,
            "area": area,
            "iscrowd": iscrowd
        }

        if self.transforms:
            augmented = self.transforms(image=img, masks=[m.numpy() for m in masks])
            img = augmented['image']
            masks = torch.stack([torch.as_tensor(m, dtype=torch.uint8) for m in augmented['masks']])
            target["masks"] = masks

        img = torchvision.transforms.functional.to_tensor(img)
        return img, target

    def __len__(self):
        return len(self.imgs)

# ------------------ Transforms ------------------
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=10, p=0.5),
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

val_transform = A.Compose([ToTensorV2()])

# ------------------ Load Datasets ------------------
train_dataset = PennFudanDataset(root="PennFudanPed/train", transforms=train_transform)
val_dataset = PennFudanDataset(root="PennFudanPed/val", transforms=val_transform)

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True, collate_fn=utils.collate_fn)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False, collate_fn=utils.collate_fn)

# ------------------ Model ------------------
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
num_classes = 2  # person + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)

# ------------------ Optimizer ------------------
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=1e-4)

# ------------------ Training Loop ------------------
num_epochs = 10
for epoch in range(num_epochs):
    print(f"Epoch [{epoch+1}/{num_epochs}]")
    train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=10)
    evaluate(model, val_loader, device=device)

# ------------------ Save model ------------------
torch.save(model.state_dict(), "frcnn_pennfudan.pth")
print("Model saved successfully.")


ModuleNotFoundError: No module named 'utils'