In [1]:
import platform, torch
print("OS:", platform.platform())
print("PyTorch:", torch.__version__)
print("torch.version.cuda:", torch.version.cuda)
print("GPU count:", torch.cuda.device_count())
print("MPS available:", getattr(torch.backends, "mps", None) and torch.backends.mps.is_available())
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA device count:", torch.cuda.device_count())
    print("Device 0:", torch.cuda.get_device_name(0))
    print("CUDA runtime:", torch.version.cuda)

OS: Windows-11-10.0.26200-SP0
PyTorch: 2.8.0+cu129
torch.version.cuda: 12.9
GPU count: 1
MPS available: False
CUDA available: True
CUDA device count: 1
Device 0: NVIDIA GeForce RTX 5080
CUDA runtime: 12.9


In [2]:
# rcnn_experiments.ipynb – Cell 1
import torch
import torchvision.transforms as T
from torch.utils.data import DataLoader
from torchvision.ops import box_iou

from rcnn_dataset import FruitDetectionDataset
from rcnn_model import get_faster_rcnn_model

import os
from PIL import Image, ImageDraw

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

# CHANGE THIS if you have a different number of fruit classes
NUM_FRUIT_CLASSES = 10
NUM_CLASSES = NUM_FRUIT_CLASSES + 1   # +1 for background

# Fill in your actual class names in order
CLASS_NAMES = [
    "background",
    "apple",
    "avocado",
    "banana",
    "kiwi",
    "lemon",
    "orange",
    "pear",
    "pomegranate",
    "strawberry",
    "watermelon",
]

Using device: cuda


In [3]:
train_imgs   = "../dataset/split/train/images"
train_labels = "../dataset/split/train/labels"

val_imgs     = "../dataset/split/val/images"
val_labels   = "../dataset/split/val/labels"

print("Train images dir:", train_imgs)
print("Train labels dir:", train_labels)
print("Val images dir:", val_imgs)
print("Val labels dir:", val_labels)

import torchvision.transforms as T
from torch.utils.data import DataLoader

def get_transforms(train=True):
    transforms = [T.ToTensor()]
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

def collate_fn(batch):
    return tuple(zip(*batch))

# create datasets
train_dataset = FruitDetectionDataset(
    images_dir=train_imgs,
    labels_dir=train_labels,
    transforms=get_transforms(train=True),
)

val_dataset = FruitDetectionDataset(
    images_dir=val_imgs,
    labels_dir=val_labels,
    transforms=get_transforms(train=False),
)

print("Train samples:", len(train_dataset))
print("Val samples:", len(val_dataset))

# data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    collate_fn=collate_fn,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=0,
    collate_fn=collate_fn,
)


Train images dir: ../dataset/split/train/images
Train labels dir: ../dataset/split/train/labels
Val images dir: ../dataset/split/val/images
Val labels dir: ../dataset/split/val/labels
Train samples: 967
Val samples: 278


In [4]:
model = get_faster_rcnn_model(num_classes=NUM_CLASSES)
model.to(DEVICE)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params,
    lr=0.005,
    momentum=0.9,
    weight_decay=0.0005,
)

lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=5,
    gamma=0.1,
)

print("Model ready.")

Model ready.


In [5]:
def train_one_epoch(model, optimizer, data_loader, epoch):
    model.train()
    total_loss = 0.0

    for images, targets in data_loader:
        images = [img.to(DEVICE) for img in images]
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        total_loss += losses.item()

    avg_loss = total_loss / len(data_loader)
    print(f"Epoch {epoch}: train loss = {avg_loss:.4f}")
    return avg_loss


@torch.no_grad()
def evaluate_simple(model, data_loader):
    """
    Very simple evaluation: computes mean IoU between
    predicted boxes and ground-truth boxes.
    """
    model.eval()
    total_iou = 0.0
    n_boxes = 0

    for images, targets in data_loader:
        images = [img.to(DEVICE) for img in images]
        outputs = model(images)

        for out, tgt in zip(outputs, targets):
            if len(out["boxes"]) == 0 or len(tgt["boxes"]) == 0:
                continue

            ious = box_iou(out["boxes"].cpu(), tgt["boxes"])
            max_iou, _ = ious.max(dim=0)   # best IoU per GT box
            total_iou += max_iou.sum().item()
            n_boxes += len(max_iou)

    if n_boxes > 0:
        mean_iou = total_iou / n_boxes
        print(f"Mean IoU over GT boxes: {mean_iou:.3f}")
        return mean_iou
    else:
        print("No boxes to evaluate (check labels).")
        return 0.0

In [6]:
max_epochs = 200          
patience = 10             
min_delta = 1e-3          

train_losses = []
val_ious = []

best_loss = float("inf")
best_state = None
epochs_no_improve = 0

for epoch in range(1, max_epochs + 1):
    print(f"\n===== Epoch {epoch}/{max_epochs} =====")
    train_loss = train_one_epoch(model, optimizer, train_loader, epoch)
    train_losses.append(train_loss)

    # LR schedule step
    lr_scheduler.step()

    # simple evaluation (IoU)
    val_iou = evaluate_simple(model, val_loader)
    val_ious.append(val_iou)

    if train_loss < best_loss - min_delta:
        best_loss = train_loss
        epochs_no_improve = 0
        best_state = model.state_dict()  # keep best weights
        print(f"New best loss: {best_loss:.4f} (model state saved)")
    else:
        epochs_no_improve += 1
        print(f"No significant improvement for {epochs_no_improve} epoch(s).")

    if epochs_no_improve >= patience:
        print(f"\n⏹ Early stopping triggered: "
              f"no improvement > {min_delta} for {patience} epochs.")
        break

print(f"\nTraining finished. Best train loss: {best_loss:.4f}")


===== Epoch 1/200 =====




Epoch 1: train loss = 0.6739
Mean IoU over GT boxes: 0.802
New best loss: 0.6739 (model state saved)

===== Epoch 2/200 =====
Epoch 2: train loss = 0.5454
Mean IoU over GT boxes: 0.783
New best loss: 0.5454 (model state saved)

===== Epoch 3/200 =====
Epoch 3: train loss = 0.5099
Mean IoU over GT boxes: 0.805
New best loss: 0.5099 (model state saved)

===== Epoch 4/200 =====
Epoch 4: train loss = 0.4695
Mean IoU over GT boxes: 0.806
New best loss: 0.4695 (model state saved)

===== Epoch 5/200 =====
Epoch 5: train loss = 0.4420
Mean IoU over GT boxes: 0.811
New best loss: 0.4420 (model state saved)

===== Epoch 6/200 =====
Epoch 6: train loss = 0.3652
Mean IoU over GT boxes: 0.821
New best loss: 0.3652 (model state saved)

===== Epoch 7/200 =====
Epoch 7: train loss = 0.3442
Mean IoU over GT boxes: 0.824
New best loss: 0.3442 (model state saved)

===== Epoch 8/200 =====
Epoch 8: train loss = 0.3446
Mean IoU over GT boxes: 0.822
No significant improvement for 1 epoch(s).

===== Epoch 9/2

In [7]:
save_path = "../models/faster_rcnn_fruits.pth"
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# if we used early stopping and have best_state, save that
if "best_state" in globals() and best_state is not None:
    torch.save(best_state, save_path)
else:
    torch.save(model.state_dict(), save_path)

print("Saved model to:", save_path)

Saved model to: ../models/faster_rcnn_fruits.pth


In [8]:
test_imgs_dir = "../dataset/test_images/test"
output_dir = "../test_results/faster_rcnn"
os.makedirs(output_dir, exist_ok=True)

score_thresh = 0.5  # confidence threshold
transform = T.ToTensor()

model.eval()

for fname in os.listdir(test_imgs_dir):
    if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
        continue

    img_path = os.path.join(test_imgs_dir, fname)
    img = Image.open(img_path).convert("RGB")
    img_tensor = transform(img).to(DEVICE)

    with torch.no_grad():
        output = model([img_tensor])[0]

    boxes = output["boxes"].cpu()
    scores = output["scores"].cpu()
    labels = output["labels"].cpu()

    draw = ImageDraw.Draw(img)

    for box, score, label in zip(boxes, scores, labels):
        if score < score_thresh:
            continue

        x1, y1, x2, y2 = box.tolist()
        cls_name = CLASS_NAMES[int(label)]

        # Draw box
        draw.rectangle([x1, y1, x2, y2], outline="red", width=3)

        # Draw text (class + score)
        text = f"{cls_name} {score:.2f}"
        draw.text((x1 + 3, y1 + 3), text, fill="yellow")

    out_path = os.path.join(output_dir, fname)
    img.save(out_path)
    print("Saved:", out_path)

Saved: ../test_results/faster_rcnn\apple_77.jpg
Saved: ../test_results/faster_rcnn\apple_78.jpg
Saved: ../test_results/faster_rcnn\apple_79.jpg
Saved: ../test_results/faster_rcnn\apple_80.jpg
Saved: ../test_results/faster_rcnn\apple_81.jpg
Saved: ../test_results/faster_rcnn\apple_82.jpg
Saved: ../test_results/faster_rcnn\apple_83.jpg
Saved: ../test_results/faster_rcnn\apple_84.jpg
Saved: ../test_results/faster_rcnn\apple_85.jpg
Saved: ../test_results/faster_rcnn\apple_86.jpg
Saved: ../test_results/faster_rcnn\apple_87.jpg
Saved: ../test_results/faster_rcnn\apple_88.jpg
Saved: ../test_results/faster_rcnn\apple_89.jpg
Saved: ../test_results/faster_rcnn\apple_90.jpg
Saved: ../test_results/faster_rcnn\apple_91.jpg
Saved: ../test_results/faster_rcnn\apple_92.jpg
Saved: ../test_results/faster_rcnn\apple_93.jpg
Saved: ../test_results/faster_rcnn\apple_94.jpg
Saved: ../test_results/faster_rcnn\apple_95.jpg
Saved: ../test_results/faster_rcnn\banana_77.jpg
Saved: ../test_results/faster_rcnn\bana