In [2]:
import platform, torch
print("OS:", platform.platform())
print("PyTorch:", torch.__version__)
print("torch.version.cuda:", torch.version.cuda)
print("GPU count:", torch.cuda.device_count())
print("MPS available:", getattr(torch.backends, "mps", None) and torch.backends.mps.is_available())
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA device count:", torch.cuda.device_count())
    print("Device 0:", torch.cuda.get_device_name(0))
    print("CUDA runtime:", torch.version.cuda)

OS: Windows-11-10.0.26200-SP0
PyTorch: 2.8.0+cu129
torch.version.cuda: 12.9
GPU count: 1
MPS available: False
CUDA available: True
CUDA device count: 1
Device 0: NVIDIA GeForce RTX 5080
CUDA runtime: 12.9


In [3]:
# rcnn_experiments.ipynb – Cell 1
import torch
import torchvision.transforms as T
from torch.utils.data import DataLoader
from torchvision.ops import box_iou
from torch.cuda.amp import autocast, GradScaler

from rcnn_dataset import FruitDetectionDataset
from rcnn_model import get_faster_rcnn_model

import os
from PIL import Image, ImageDraw

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

# CHANGE THIS if you have a different number of fruit classes
NUM_FRUIT_CLASSES = 10
NUM_CLASSES = NUM_FRUIT_CLASSES + 1   # +1 for background

# Fill in your actual class names in order
CLASS_NAMES = [
    "background",
    "apple",
    "avocado",
    "banana",
    "kiwi",
    "lemon",
    "orange",
    "pear",
    "pomegranate",
    "strawberry",
    "watermelon",
]

Using device: cuda


In [11]:
train_imgs   = "../dataset/input/images"
train_labels = "../dataset/input/labels"

val_imgs     = "../dataset/split/val/images"
val_labels   = "../dataset/split/val/labels"

print("Train images dir:", train_imgs)
print("Train labels dir:", train_labels)
print("Val images dir:", val_imgs)
print("Val labels dir:", val_labels)

import torchvision.transforms as T
from torch.utils.data import DataLoader

def get_transforms(train=True):
    transforms = [T.ToTensor()]
    if train:
        transforms.extend([
            T.RandomHorizontalFlip(0.5),
            T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            T.RandomAdjustSharpness(sharpness_factor=2, p=0.5),
            # Can also add: RandomRotation, RandomAffine, GaussianBlur
        ])
    return T.Compose(transforms)

def collate_fn(batch):
    return tuple(zip(*batch))

# create datasets
train_dataset = FruitDetectionDataset(
    images_dir=train_imgs,
    labels_dir=train_labels,
    transforms=get_transforms(train=True),
)

val_dataset = FruitDetectionDataset(
    images_dir=val_imgs,
    labels_dir=val_labels,
    transforms=get_transforms(train=False),
)

print("Train samples:", len(train_dataset))
print("Val samples:", len(val_dataset))

# data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    collate_fn=collate_fn,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=2,
    shuffle=False,
    num_workers=0,
    collate_fn=collate_fn,
)

Train images dir: ../dataset/input/images
Train labels dir: ../dataset/input/labels
Val images dir: ../dataset/split/val/images
Val labels dir: ../dataset/split/val/labels
Train samples: 1504
Val samples: 229


In [7]:
model = get_faster_rcnn_model(num_classes=NUM_CLASSES)
model.to(DEVICE)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.AdamW(
    params,
    lr=0.0001,
    weight_decay=0.0001
)

lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=5
)

print("Model ready.")

Model ready.


In [8]:
scaler = GradScaler()

def train_one_epoch(model, optimizer, data_loader, epoch):
    model.train()
    total_loss = 0.0

    for images, targets in data_loader:
        images = [img.to(DEVICE) for img in images]
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        with autocast():
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        scaler.scale(losses).backward()
        scaler.step(optimizer)
        scaler.update()

        total_loss += losses.item()

    avg_loss = total_loss / len(data_loader)
    print(f"Epoch {epoch}: train loss = {avg_loss:.4f}")
    return total_loss / len(data_loader)


@torch.no_grad()
def evaluate_simple(model, data_loader):
    """
    Very simple evaluation: computes mean IoU between
    predicted boxes and ground-truth boxes.
    """
    model.eval()
    total_iou = 0.0
    n_boxes = 0

    for images, targets in data_loader:
        images = [img.to(DEVICE) for img in images]
        outputs = model(images)

        for out, tgt in zip(outputs, targets):
            if len(out["boxes"]) == 0 or len(tgt["boxes"]) == 0:
                continue

            ious = box_iou(out["boxes"].cpu(), tgt["boxes"])
            max_iou, _ = ious.max(dim=0)   # best IoU per GT box
            total_iou += max_iou.sum().item()
            n_boxes += len(max_iou)

    if n_boxes > 0:
        mean_iou = total_iou / n_boxes
        print(f"Mean IoU over GT boxes: {mean_iou:.3f}")
        return mean_iou
    else:
        print("No boxes to evaluate (check labels).")
        return 0.0

  scaler = GradScaler()


In [9]:
max_epochs = 100          
patience = 10             
min_delta = 1e-3          

train_losses = []
val_ious = []

best_loss = float("inf")
best_state = None
epochs_no_improve = 0
best_val_iou = 0.0

for epoch in range(1, max_epochs + 1):
    print(f"\n===== Epoch {epoch}/{max_epochs} =====")
    train_loss = train_one_epoch(model, optimizer, train_loader, epoch)
    train_losses.append(train_loss)

    # simple evaluation (IoU)
    val_iou = evaluate_simple(model, val_loader)
    val_ious.append(val_iou)

    # LR schedule step
    lr_scheduler.step(val_iou)

    # Early stopping on validation performance
    if val_iou > best_val_iou + min_delta:
        best_val_iou = val_iou
        epochs_no_improve = 0
        best_state = model.state_dict()
        print(f"New best val IoU: {best_val_iou:.4f} (model state saved)")
    #elif train_loss < best_loss - min_delta:
    #    best_loss = train_loss
    #    epochs_no_improve = 0
    #    best_state = model.state_dict()  # keep best weights
    #    print(f"New best loss: {best_loss:.4f} (model state saved)")
    else:
        epochs_no_improve += 1
        print(f"No significant improvement for {epochs_no_improve} epoch(s).")

    if epochs_no_improve >= patience:
        print(f"\n⏹ Early stopping triggered: "
              f"no improvement > {min_delta} for {patience} epochs.")
        break

print(f"\nTraining finished. Best train loss: {best_loss:.4f}")


===== Epoch 1/100 =====


  with autocast():


Epoch 1: train loss = 0.7174
Mean IoU over GT boxes: 0.691
New best val IoU: 0.6905 (model state saved)

===== Epoch 2/100 =====
Epoch 2: train loss = 0.6138
Mean IoU over GT boxes: 0.794
New best val IoU: 0.7939 (model state saved)

===== Epoch 3/100 =====
Epoch 3: train loss = 0.5907
Mean IoU over GT boxes: 0.772
No significant improvement for 1 epoch(s).

===== Epoch 4/100 =====
Epoch 4: train loss = 0.5707
Mean IoU over GT boxes: 0.770
No significant improvement for 2 epoch(s).

===== Epoch 5/100 =====


KeyboardInterrupt: 

In [9]:
save_path = "../models/faster_rcnn_fruits.pth"
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# if we used early stopping and have best_state, save that
if "best_state" in globals() and best_state is not None:
    torch.save(best_state, save_path)
else:
    torch.save(model.state_dict(), save_path)

print("Saved model to:", save_path)

Saved model to: ../models/faster_rcnn_fruits.pth


In [21]:
import os
import torchvision.transforms as T
import torchvision.ops as ops
from PIL import Image, ImageDraw

test_imgs_dir = "../dataset/test_images/test"
output_dir = "../test_results/faster_rcnn"
os.makedirs(output_dir, exist_ok=True)

# thresholds
score_thresh = 0.7          # confidence threshold (higher = fewer false positives)
nms_iou_thresh = 0.5

transform = T.ToTensor()

model.eval()

for fname in os.listdir(test_imgs_dir):
    if not fname.lower().endswith((".jpg", ".jpeg", ".png")):
        continue

    img_path = os.path.join(test_imgs_dir, fname)
    img = Image.open(img_path).convert("RGB")
    img_tensor = transform(img).to(DEVICE)

    with torch.no_grad():
        output = model([img_tensor])[0]

    boxes = output["boxes"].cpu()
    scores = output["scores"].cpu()
    labels = output["labels"].cpu()

    keep = scores >= score_thresh
    boxes = boxes[keep]
    scores = scores[keep]
    labels = labels[keep]

    if len(scores) == 0:
        out_path = os.path.join(output_dir, fname)
        img.save(out_path)
        print(f"{fname}: no detections above threshold, saved original.")
        continue

    keep_idx = ops.nms(boxes, scores, nms_iou_thresh)
    boxes = boxes[keep_idx]
    scores = scores[keep_idx]
    labels = labels[keep_idx]

    if len(scores) > 1:
        top_idx = scores.argmax()
        boxes = boxes[top_idx:top_idx+1]
        scores = scores[top_idx:top_idx+1]
        labels = labels[top_idx:top_idx+1]

    draw = ImageDraw.Draw(img)

    for box, score, label in zip(boxes, scores, labels):
        x1, y1, x2, y2 = box.tolist()
        cls_name = CLASS_NAMES[int(label)]

        draw.rectangle([x1, y1, x2, y2], outline="red", width=3)

        text = f"{cls_name} {score:.2f}"
        draw.text((x1 + 3, y1 + 3), text, fill="yellow")

    out_path = os.path.join(output_dir, fname)
    img.save(out_path)
    print(f"{fname}: saved with {len(boxes)} detection(s) -> {out_path}")


apple_77.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_77.jpg
apple_78.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_78.jpg
apple_79.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_79.jpg
apple_80.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_80.jpg
apple_81.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_81.jpg
apple_82.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_82.jpg
apple_83.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_83.jpg
apple_84.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_84.jpg
apple_85.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_85.jpg
apple_86.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_86.jpg
apple_87.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple_87.jpg
apple_88.jpg: saved with 1 detection(s) -> ../test_results/faster_rcnn\apple