In [1]:
import torch
import torchvision
from torch.utils.data import DataLoader
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
class CocoTransform:
    def __call__(self, image, target):
        image = F.to_tensor(image)    # Converts PIL image to tensor (C,H,W) float in [0,1]
        return image, target

In [3]:
def get_coco_dataset(img_dir, ann_file):
    return CocoDetection(
        root=img_dir,
        annFile=ann_file,
        transforms=CocoTransform()
    )

In [4]:
train_dataset = get_coco_dataset(
    img_dir="/Users/swaksharbora/Documents/ML Assingment/Data/train",
    ann_file="/Users/swaksharbora/Documents/ML Assingment/Datatrain/annotations/pothhole_annotate_coco.json"
)

val_dataset = get_coco_dataset(
    img_dir="/Users/swaksharbora/Documents/ML Assingment/Data/val",
    ann_file="/Users/swaksharbora/Documents/ML Assingment/Data/val/annotations/annotations_coco.json"
)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [5]:
# collate_fn returns tuple(list_of_images, list_of_targets)
collate_fn = lambda x: tuple(zip(*x))
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)


In [6]:
def get_model(num_classes):
    # Load pre-trained Faster R-CNN
    # NOTE: depending on torchvision version you may want weights="DEFAULT" instead of pretrained=True
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # Get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one for our num_classes
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

# For a single target class 'pothole' use 2 (background + pothole)
num_classes = 2
model = get_model(num_classes)

# Debug: check model wasn't None
print("Model created:", model.__class__.__name__)



Model created: FasterRCNN


In [7]:
# ---------- Device ----------
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = model.to(device)
print("Using device:", device)

# ---------- Optimizer and scheduler ----------
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


Using device: cpu


In [8]:
def train_one_epoch(model, optimizer, data_loader, device, epoch):
    model.train()
    running_loss = None

    for images, targets in data_loader:
        # images: tuple of tensors (C,H,W), targets: tuple of lists/dicts
        # Move images to device
        images = [img.to(device) for img in images]

        # Validate and process targets
        processed_targets = []
        valid_images = []
        for i, target in enumerate(targets):
            boxes = []
            labels = []
            # Each 'target' is a list of annotation dicts for that image
            for obj in target:
                bbox = obj.get("bbox", None)  # [x, y, width, height]
                if bbox is None:
                    continue
                x, y, w, h = bbox
                if w > 0 and h > 0:
                    # append a 4-element list per box
                    boxes.append([x, y, x + w, y + h])
                    labels.append(obj.get("category_id", 1))  # fallback if missing

            if boxes:
                processed_target = {
                    "boxes": torch.tensor(boxes, dtype=torch.float32).to(device),
                    "labels": torch.tensor(labels, dtype=torch.int64).to(device),
                }
                processed_targets.append(processed_target)
                valid_images.append(images[i])

        # Skip if no valid targets in the batch
        if not processed_targets:
            continue

        # Forward pass (model expects list[tensor], list[dict])
        loss_dict = model(valid_images, processed_targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backprop
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        running_loss = losses.item()

    if running_loss is None:
        print(f"Epoch [{epoch}] - No valid training samples processed this epoch.")
    else:
        print(f"Epoch [{epoch}] Loss: {running_loss:.4f}")


In [15]:
num_epochs = 25
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, train_loader, device, epoch)
    lr_scheduler.step()

    # Save model state dict
    model_path = f"fasterrcnn_resnet50_epoch{epoch + 1}.pth"
    torch.save(model.state_dict(), model_path)
    print(f"Model saved: {model_path}")

Epoch [0] Loss: 3.7457
Model saved: fasterrcnn_resnet50_epoch1.pth
Epoch [1] Loss: 0.4428
Model saved: fasterrcnn_resnet50_epoch2.pth
Epoch [2] Loss: 0.6232
Model saved: fasterrcnn_resnet50_epoch3.pth
Epoch [3] Loss: 0.5354
Model saved: fasterrcnn_resnet50_epoch4.pth
Epoch [4] Loss: 0.3215
Model saved: fasterrcnn_resnet50_epoch5.pth
Epoch [5] Loss: 0.2675
Model saved: fasterrcnn_resnet50_epoch6.pth
Epoch [6] Loss: 0.3568
Model saved: fasterrcnn_resnet50_epoch7.pth
Epoch [7] Loss: 0.4296
Model saved: fasterrcnn_resnet50_epoch8.pth
Epoch [8] Loss: 0.4719
Model saved: fasterrcnn_resnet50_epoch9.pth
Epoch [9] Loss: 0.5845
Model saved: fasterrcnn_resnet50_epoch10.pth
Epoch [10] Loss: 0.4850
Model saved: fasterrcnn_resnet50_epoch11.pth
Epoch [11] Loss: 0.6499
Model saved: fasterrcnn_resnet50_epoch12.pth
Epoch [12] Loss: 0.5318
Model saved: fasterrcnn_resnet50_epoch13.pth


KeyboardInterrupt: 