Turning the ground truth text file into a CSV

In [4]:
import pandas as pd
from pathlib import Path

# read lines, split on ';'
lines = Path(r"C:\Users\eren\Desktop\AI Traffic sign detect\RCNN\TrainIJCNN2013\gt.txt").read_text().strip().splitlines()
records = []
for L in lines:
    fname, x1, y1, x2, y2, cls = L.split(';')
    records.append({
        "filename": fname,
        "x1": int(x1),
        "y1": int(y1),
        "x2": int(x2),
        "y2": int(y2),
        "class_id": int(cls),
    })

df = pd.DataFrame(records)
df.to_csv(r"C:\Users\eren\Desktop\AI Traffic sign detect\RCNN\annotations.csv", index=False)

GTSDB Dataset & DataLoader Setup

In [1]:
import torch, torchvision
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import torch
import torchvision
from torchvision.ops import box_iou
from torchmetrics.detection.mean_ap import MeanAveragePrecision


class GTSDBDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transforms=None):
        import pandas as pd
        self.df = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transforms = transforms
        # group annotations by image
        self.grouped = self.df.groupby("filename")

        # list of unique image names
        self.images = list(self.grouped.groups.keys())

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        annots = self.grouped.get_group(img_name)

        # load image
        img_path = os.path.join(self.img_dir, img_name)
        img = Image.open(img_path).convert("RGB")

        # boxes & labels
        boxes = torch.tensor(
            annots[["x1","y1","x2","y2"]].values,
            dtype=torch.float32
        )
        labels = torch.tensor(annots["class_id"].values, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx]),
        }

        if self.transforms:
            img = self.transforms(img)

        return img, target

# transforms & loaders
import torchvision.transforms as T
train_transforms = T.Compose([
    T.ToTensor(),
    T.RandomHorizontalFlip(0.5),
])
val_transforms = T.ToTensor()

train_ds = GTSDBDataset(r"C:\Users\eren\Desktop\AI Traffic sign detect\RCNN\annotations.csv",r"C:\Users\eren\Desktop\AI Traffic sign detect\RCNN\TrainIJCNN2013" , transforms=train_transforms)
val_ds   = GTSDBDataset(r"C:\Users\eren\Desktop\AI Traffic sign detect\RCNN\annotations.csv",r"C:\Users\eren\Desktop\AI Traffic sign detect\RCNN\TrainIJCNN2013", transforms=val_transforms)

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, collate_fn=collate_fn)
val_loader   = DataLoader(val_ds,   batch_size=4, shuffle=False, collate_fn=collate_fn)


Prepare Resnet50 for transfer learning

In [7]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# load COCO‐pretrained model
model = fasterrcnn_resnet50_fpn(pretrained=True)

# replace the head
num_classes = 1 + df["class_id"].nunique()  
in_feats = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_feats, num_classes)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)




FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

Train Model

In [None]:
from tqdm import tqdm
import time

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    start_epoch = time.time()

    # wrap loader in tqdm, but we'll use enumerate() to get batch_i
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
    for batch_i, (imgs, targets) in enumerate(pbar, start=1):
        # move data
        imgs = [img.to(device) for img in imgs]
        tgt  = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # forward + loss
        loss_dict = model(imgs, tgt)
        loss = sum(loss for loss in loss_dict.values())

        # backward + step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # accumulate
        running_loss += loss.item()
        avg_loss = running_loss / batch_i  # use batch_i, not pbar.n

        # update bar with our avg_loss
        pbar.set_postfix(avg_loss=f"{avg_loss:.4f}")

    # scheduler step & epoch summary
    lr_scheduler.step()
    epoch_time = time.time() - start_epoch
    print(f"→ Epoch {epoch+1} done in {epoch_time:.1f}s  Avg Loss: {avg_loss:.4f}\n")


Epoch 1/10: 100%|██████████| 127/127 [1:24:33<00:00, 39.95s/batch, avg_loss=0.3374]


→ Epoch 1 done in 5073.2s  Avg Loss: 0.3374



Epoch 2/10: 100%|██████████| 127/127 [1:20:04<00:00, 37.83s/batch, avg_loss=0.2846]


→ Epoch 2 done in 4804.6s  Avg Loss: 0.2846



Epoch 3/10: 100%|██████████| 127/127 [1:26:57<00:00, 41.08s/batch, avg_loss=0.2526]


→ Epoch 3 done in 5217.3s  Avg Loss: 0.2526



Epoch 4/10: 100%|██████████| 127/127 [1:26:42<00:00, 40.97s/batch, avg_loss=0.2337]


→ Epoch 4 done in 5202.9s  Avg Loss: 0.2337



Epoch 5/10: 100%|██████████| 127/127 [1:26:49<00:00, 41.02s/batch, avg_loss=0.2290]


→ Epoch 5 done in 5209.6s  Avg Loss: 0.2290



Epoch 6/10: 100%|██████████| 127/127 [1:20:58<00:00, 38.26s/batch, avg_loss=0.2246]


→ Epoch 6 done in 4858.6s  Avg Loss: 0.2246



Epoch 7/10: 100%|██████████| 127/127 [1:21:05<00:00, 38.31s/batch, avg_loss=0.2223]


→ Epoch 7 done in 4865.6s  Avg Loss: 0.2223



Epoch 8/10: 100%|██████████| 127/127 [1:22:02<00:00, 38.76s/batch, avg_loss=0.2211]


→ Epoch 8 done in 4922.5s  Avg Loss: 0.2211



Epoch 9/10: 100%|██████████| 127/127 [1:21:42<00:00, 38.60s/batch, avg_loss=0.2203]


→ Epoch 9 done in 4902.1s  Avg Loss: 0.2203



Epoch 10/10: 100%|██████████| 127/127 [1:20:43<00:00, 38.14s/batch, avg_loss=0.2244]

→ Epoch 10 done in 4843.7s  Avg Loss: 0.2244






Save Model

In [12]:
# Save only the model’s parameters
torch.save(model.state_dict(), "fasterrcnn_gtsdb_weights.pth")
print("→ Weights saved to fasterrcnn_gtsdb_weights.pth")

→ Weights saved to fasterrcnn_gtsdb_weights.pth


Load Model

In [5]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch

# 1. Rebuild the exact same architecture
num_classes = 1 + 43   # change if you have a different number of classes
model = fasterrcnn_resnet50_fpn(pretrained=False)
in_feats = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_feats, num_classes)

# 2. Load your saved weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load("fasterrcnn_gtsdb_weights.pth", map_location=device))
model.to(device).eval()
print("→ Model loaded, ready for inference")



→ Model loaded, ready for inference


Function to evaluate overall precision & recall @ (score_threshold, iou_threshold)

In [None]:
def evaluate_pr_iou(model, data_loader, device,
                    iou_threshold=0.5, score_threshold=0.05):

    model.eval()
    tp = fp = fn = 0
    iou_accum = []
    with torch.no_grad():
        for imgs, targets in data_loader:
            imgs = [img.to(device) for img in imgs]
            outputs = model(imgs)

            for out, tgt in zip(outputs, targets):
                gt_boxes = tgt["boxes"].to(device)
                # filter predictions by score
                keep = out["scores"] > score_threshold
                pred_boxes = out["boxes"][keep]

                if len(pred_boxes) == 0:
                    fn += len(gt_boxes)
                    continue
                if len(gt_boxes) == 0:
                    fp += len(pred_boxes)
                    continue

                # IoU matrix
                ious = box_iou(pred_boxes, gt_boxes)

                # True positives: preds matching any gt
                matched_pred = (ious >= iou_threshold).any(dim=1)
                tp += matched_pred.sum().item()
                fp += (~matched_pred).sum().item()

                # False negatives: gt not matched by any pred
                matched_gt = (ious >= iou_threshold).any(dim=0)
                fn += (~matched_gt).sum().item()

                # record IoUs of matched preds for mean‑IoU
                # for each pred, take its best IoU
                best_ious, _ = ious.max(dim=1)
                iou_accum.extend(best_ious[matched_pred].cpu().tolist())

    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall    = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    mean_iou  = sum(iou_accum) / len(iou_accum) if iou_accum else 0.0

    return {
        "precision": precision,
        "recall": recall,
        "mean_iou": mean_iou
    }


Use torchmetrics to compute mAP@0.5 for all classes.

In [None]:
def evaluate_map(model, data_loader, device):

    metric = MeanAveragePrecision(
        iou_type='bbox',
        iou_thresholds=[0.5],
        class_metrics=True
    ).to(device)

    model.eval()
    with torch.no_grad():
        for imgs, targets in data_loader:
            imgs = [img.to(device) for img in imgs]
            targs = [{k: v.to(device) for k, v in t.items()} for t in targets]
            preds = model(imgs)
            metric.update(preds, targs)

    return metric.compute()


Run Evaluation Functions

In [None]:


print("→ Running quick PR/IoU evaluation on validation set…")
pr_iou = evaluate_pr_iou(model, val_loader, device,
                         iou_threshold=0.5, score_threshold=0.05)
print(f"Precision@0.05/IoU0.5: {pr_iou['precision']:.4f}")
print(f"Recall   @0.05/IoU0.5: {pr_iou['recall']:.4f}")
print(f"Mean IoU          : {pr_iou['mean_iou']:.4f}\n")

print("→ Running mAP@0.5 evaluation via torchmetrics…")
map_results = evaluate_map(model, val_loader, device)
print(f"mAP@0.5: {map_results['map_50']:.4f}\n")




→ Running quick PR/IoU evaluation on validation set…
Precision@0.05/IoU0.5: 0.9237
Recall   @0.05/IoU0.5: 0.9981
Mean IoU          : 0.8339

→ Running mAP@0.5 evaluation via torchmetrics…
mAP@0.5: 0.1841



NameError: name 'class_names' is not defined