## Agritrack - Faster R-CNN Sagemaker Model Training Code

Note: This notebook assumes that there is a .zip dataset file at the root directory of your runtime for use in its execution as a dependency.

Please assign the path to your .zip dataset to the **dataset_path** variable before executing the code.



Extract uploaded dataset

In [2]:
# Extract dataset .zip - Modify dataset filepath below to change file
# # Dataset 1
# !curl -L "https://universe.roboflow.com/ds/Cztxji5jQE?key=be41mb5A7U" > dataset_1.zip
# !ls
# !unzip dataset_1.zip -d extracted_dataset_1
# # Dataset 2
# !curl -L "https://universe.roboflow.com/ds/cAuiHPSNbg?key=mqvek1KwT2" > dataset_2.zip
# !ls
# !unzip dataset_1.zip -d extracted_dataset_2
# Dataset 3 
# !curl -L "https://universe.roboflow.com/ds/sUnQ0kXD5f?key=i403GGBAZM" > dataset_3.zip
# !ls
# !unzip dataset_1.zip -d extracted_dataset_3
# Custom dataset upload extractions
#!unzip dataset_4.zip -d extracted_dataset_4
!unzip dataset_5.zip -d extracted_dataset_5

Archive:  dataset_5.zip
   creating: extracted_dataset_5/sheep_video_02/
  inflating: extracted_dataset_5/__MACOSX/._sheep_video_02  
  inflating: extracted_dataset_5/sheep_video_02/.DS_Store  
  inflating: extracted_dataset_5/__MACOSX/sheep_video_02/._.DS_Store  
   creating: extracted_dataset_5/sheep_video_02/test/
  inflating: extracted_dataset_5/__MACOSX/sheep_video_02/._test  
   creating: extracted_dataset_5/sheep_video_02/train/
  inflating: extracted_dataset_5/__MACOSX/sheep_video_02/._train  
   creating: extracted_dataset_5/sheep_video_02/val/
  inflating: extracted_dataset_5/__MACOSX/sheep_video_02/._val  
  inflating: extracted_dataset_5/sheep_video_02/test/frame_00155_rot180.jpg  
  inflating: extracted_dataset_5/__MACOSX/sheep_video_02/test/._frame_00155_rot180.jpg  
  inflating: extracted_dataset_5/sheep_video_02/test/frame_00155_rot180.xml  
  inflating: extracted_dataset_5/__MACOSX/sheep_video_02/test/._frame_00155_rot180.xml  
  inflating: extracted_dataset_5/sheep_vi

### Faster-RCNN Model 1

Import dependencies

In [1]:
import os
import xml.etree.ElementTree as ET
import torch
from torchvision.transforms import functional as F
from PIL import Image
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.ops import box_iou
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import matplotlib.pyplot as plt

Initialise directory vars

In [2]:
train_dir = 'extracted_dataset/train'
valid_dir = 'extracted_dataset/valid'
test_dir = 'extracted_dataset/test'
train_dir_2 = 'extracted_dataset_2/train'
valid_dir_2 = 'extracted_dataset_2/valid'
test_dir_2 = 'extracted_dataset_2/test'
train_dir_3 = 'extracted_dataset_3/train'
valid_dir_3 = 'extracted_dataset_3/valid'
test_dir_3 = 'extracted_dataset_3/test'
train_dir_4 = 'extracted_dataset_4/sheep_video_01/train'
valid_dir_4 = 'extracted_dataset_4/sheep_video_01/valid'
test_dir_4 = 'extracted_dataset_4/sheep_video_01/test'
train_dir_5 = 'extracted_dataset_5/sheep_video_02/train'
valid_dir_5 = 'extracted_dataset_5/sheep_video_02/valid'
test_dir_5 = 'extracted_dataset_5/sheep_video_02/test'

Initialise class vars

In [3]:
classes = ['__background__', 'sheep']
num_classes = len(classes)

Define VOCDataset class to parse VOC xml

In [4]:
"""
Creates a VOC (Visual Object Classes) dataset from formated object detection directories at a given path.
"""
class VOCDataset(Dataset):
    """
    Initialiser for the VOCDataset class.

    Args:
      root_dir: the specified target path to parse the VOC data.
      transforms: the transforms to apply to the images.
    """
    def __init__(self, root_dir, transforms=None):
        # Set root directory where images and annotations are stored
        self.root_dir = root_dir
        self.transforms = transforms

        # Get list of all image filenames ending with .jpg
        self.images = [f for f in os.listdir(root_dir) if f.endswith('.jpg')]

        # Sort filenames to maintain consistent ordering
        self.images.sort()


    """
    Helper function to get an image and its associated object data for a given index.

    Args:
      idx: the index of the data to get.
    """
    def __getitem__(self, idx):
        # Get the image filename
        img_name = self.images[idx]

        # Build full paths for the image and its corresponding annotation file
        img_path = os.path.join(self.root_dir, img_name)
        xml_path = img_path.replace('.jpg', '.xml')

        # Load image and convert to RGB
        img = Image.open(img_path).convert("RGB")

        # Parse XML annotation to get bounding boxes and labels
        boxes, labels = self.parse_voc_xml(xml_path)

        # Convert to torch tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        # Build target dictionary
        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([idx])
        }

        # Apply transforms if any
        if self.transforms:
            img = self.transforms(img)
        else:
            img = F.to_tensor(img)

        return img, target


    """A helper to return the length of the dataset."""
    def __len__(self):
        # Return total number of images
        return len(self.images)


    """
    A helper function to parse an individual VOC formatted XML file.

    Args:
      xml_file: the path to the XML file to parse.
    """
    def parse_voc_xml(self, xml_file):
        # Parse the XML annotation file using ElementTree
        tree = ET.parse(xml_file)
        root = tree.getroot()

        # Initialize lists to store bounding boxes and labels
        boxes, labels = [], []

        # Loop over all object elements in the XML
        for obj in root.findall("object"):
            # Get the object class name
            label = obj.find("name").text

            # Skip labels that are not in the defined CLASSES list
            if label not in classes:
                continue

            # Convert label name to its corresponding index in CLASSES
            labels.append(classes.index(label))

            # Extract the bounding box coordinates from the XML
            bbox = obj.find("bndbox")
            box = [
                float(bbox.find("xmin").text),  # left
                float(bbox.find("ymin").text),  # top
                float(bbox.find("xmax").text),  # right
                float(bbox.find("ymax").text)   # bottom
            ]
            boxes.append(box)

        # Return list of bounding boxes and their corresponding labels
        return boxes, labels


Load Datasets

In [5]:
# Create the training, test and validaation dataset
train_dataset = VOCDataset(train_dir)
train_dataset_2 = VOCDataset(train_dir_2)
train_dataset_3 = VOCDataset(train_dir_3)
train_dataset_4 = VOCDataset(train_dir_4)
train_dataset_5 = VOCDataset(train_dir_5)
combined_train_dataset = ConcatDataset([train_dataset, train_dataset_2, train_dataset_3, train_dataset_4, train_dataset_5])
test_dataset = VOCDataset(test_dir)
test_dataset_2 = VOCDataset(test_dir_2)
test_dataset_3 = VOCDataset(test_dir_3)
test_dataset_4 = VOCDataset(test_dir_4)
combined_test_dataset = ConcatDataset([test_dataset, test_dataset_2, test_dataset_3, test_dataset_4])
valid_dataset = VOCDataset(valid_dir)
valid_dataset_2 = VOCDataset(valid_dir_2)
valid_dataset_3 = VOCDataset(valid_dir_3)
valid_dataset_4 = VOCDataset(valid_dir_4)
combined_valid_dataset = ConcatDataset([valid_dataset, valid_dataset_2, valid_dataset_3, valid_dataset_4])

# Set device to run model against
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

Define CustomFastRCNNPredictor and FRCNNObjectDetector Class

In [6]:
"""
An CustomFastRCNNPredictor class to be used as the predictor head for the FRCNN model in the FRCNNObjectDetector class.
"""
class CustomFastRCNNPredictor(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(in_channels, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

        self.bbox_regressor = nn.Linear(in_channels, num_classes * 4)

    def forward(self, x):
        scores = self.classifier(x)
        bbox_deltas = self.bbox_regressor(x)
        return scores, bbox_deltas


"""
An FRCNNObjectDetector class which can be configured to use Faster-RCNN.
"""
class FRCNNObjectDetector():
  """
  Initialiser for the FRCNNObjectDetector class.

  Args:
    model_name: the name of the current model to train
    classes: the classes to detect.
    num_classes: the number of classes to detect.
    train_dataset: the training dataset.
    test_dataset: the testing dataset.
    valid_dataset: the validation dataset.
    epochs: the number of epochs to train the model for.
  """
  def __init__(self, model_name, classes, num_classes, train_dataset, test_dataset, valid_dataset, device, epochs=10):
    self.model_name = model_name
    self.classes = classes
    self.num_classes = num_classes
    self.train_dataset = train_dataset
    self.test_dataset = test_dataset
    self.valid_dataset = valid_dataset
    self.device = device
    self.epochs = epochs
    self.model = self.get_model()
    self.start_epoch = 0

  """Function to train the initialised model for the object detector."""
  def train_model(self, model_load_filepath=None):
    print(f"####### Training Object Detection model - classes: {self.classes} #######")

    # Create a DataLoader with custom collate function for handling variable-size targets
    data_loader = DataLoader(
        self.train_dataset,
        batch_size=2,
        shuffle=True,
        collate_fn=lambda x: tuple(zip(*x))
    )
        
    # Move model to run on GPU if available
    model = self.model.to(self.device)

    # Define the optimizer
    optimizer = torch.optim.SGD(self.model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

    # Load model checkpoint if file is given
    if model_load_filepath:
        optimizer, self.start_epoch = self.load_model(model_load_filepath, optimizer)
    else:
        self.start_epoch = 0
      
    #### Logging and Checkpointing Settings
    print_every = 1        # Print loss every N batches
    save_every = 1         # Save model every N epochs
    val_every = 1          # Validate every N epochs
    save_dir = 'models/object_detection' # Path to save model checkpoints
    os.makedirs(save_dir, exist_ok=True)

    # Initialize logging lists
    epoch_losses = []
    iteration_losses = []
    val_maps = []

    # Start training loop
    for epoch in range(self.start_epoch, self.epochs):
        model.train()  # Set model to training mode
        total_loss = 0  # Track total loss for the epoch

        for i, (images, targets) in enumerate(data_loader):

            # Create new clean images and targets lists
            clean_images = []
            clean_targets = []

            for img, tgt in zip(images, targets):
                img, tgt = self.remove_invalid_boxes(img, tgt)
                if img is not None and tgt is not None:
                    clean_images.append(img.to(self.device))
                    clean_targets.append({k: v.to(self.device) for k, v in tgt.items()})

            # Move all images and targets to the selected device
            images = [img.to(self.device) for img in clean_images]
            targets = [{k: v.to(self.device) for k, v in t.items()} for t in clean_targets]

            # Get the loss dict from the model
            loss_dict = model(images, targets)

            # Combine all losses into a single scalar
            losses = sum(loss for loss in loss_dict.values())

            # Backward pass and optimizer step
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            # Accumulate batch loss and append to losses per iteration
            total_loss += losses.item()
            iteration_losses.append(losses.item())
            # Print batch loss every few iterations
            if (i + 1) % print_every == 0:
                print(f"  [Epoch {epoch+1}, Iter {i+1}] Loss: {losses.item():.4f}")

        # Print total loss at the end of the epoch and store losses for loss curve
        print(f"Epoch [{epoch+1}/{self.epochs}], Total Loss: {total_loss:.4f}")
        epoch_losses.append(total_loss)

        if (epoch+1) % val_every == 0:
          # Evaluate on validation set
          val_results = self.evaluate_map(model, self.valid_dataset)
          val_map = val_results["map"].item()
          val_maps.append(val_map)
          print(f"📈 Validation mAP at epoch {epoch+1}: {val_map:.4f}")

        # Save checkpoint for loss every few epochs
        if (epoch + 1) % save_every == 0:
          self.save_model(model, optimizer, epoch+1, filename=str(os.path.join(save_dir, f"{self.model_name}_checkpoint_epoch_{epoch+1}.pth")))

    # save final model after training is complete
    self.save_model(model, optimizer, epoch+1, filename=str(os.path.join(save_dir,f"{self.model_name}_final_epoch_{epoch+1}.pth")))

    # plot training metrics using matplot
    self.plot_training_metrics(epoch_losses, val_maps)


  """
  Function to test the trained model against the test dataset and print result metrics.
  
  Args:
      dataset_test_type: the type of dataset to test against, can be eithr 'test' or 'validate'. Defaults to 'validate'.
  """
  def test_model(self, dataset_test_type='validate'):

    # Make sure dataset_test_type input is correct
    if dataset_test_type not in ['validate', 'test']:
        raise Exception("Please select either 'validate' or 'test' as dataset_test_type.")

    # Set selected dataset
    dataset = self.valid_dataset if dataset_test_type == 'validate' else self.test_dataset 
      
    # Create a DataLoader for the test set
    test_loader = DataLoader(
        dataset,
        batch_size=2,
        shuffle=False,  # No shuffling needed for evaluation
        collate_fn=lambda x: tuple(zip(*x))  # Handle varying-size targets
    )

    # Evaluate the model on the test dataset
    test_results = self.evaluate_map(self.model, dataset)
    
    # Print overall mAP/mAR results for the test set
    print("\n📊 Test set mAP/mAR results:")
    for k, v in test_results.items():
        # Convert torch tensors to NumPy arrays for cleaner display
        if isinstance(v, torch.Tensor):
            print(f"{k}: {v.numpy()}")
        else:
            print(f"{k}: {v}")

    # Print table header for per-class AP and AR
    print(f"\nAP / AR per class on {dataset_test_type} dataset\n" + "-"*73)
    print(f"| {'ID':<3} | {'Class':<20} | {'AP':<18} | {'AR':<18} |")
    print("-"*73)
    
    # Loop through each class (excluding background)
    for i, cls in enumerate(self.classes[1:], 1):
        # Get AP and AR for the current class
        ap = test_results['map_per_class'][i-1].item()
        ar = test_results['mar_100_per_class'][i-1].item()
    
        # Print table row for the class
        print(f"| {i:<3} | {cls:<20} | {ap:<18.3f} | {ar:<18.3f} |")
    
    # Print average AP and AR across all classes
    print("-"*73)
    print(f"| {'Avg':<24} | {test_results['map'].item():<18.3f} | {test_results['mar_100'].item():<18.3f} |")
    print("-"*73)

    
  """
  Helper function to plot completed training metrics

  Args:
    epoch_losses: a list of loss metric for each training epoch.
    val_maps: a list of mean average precision evaluations on the validation dataset.
  """  
  def plot_training_metrics(self, epoch_losses, val_maps):
    print("Creating plots for epoch loss and mAP metrics!")
    # Plot training loss per epoch
    plt.figure(figsize=(10, 4))
    plt.plot(epoch_losses, marker='o')
    plt.title("Training Loss per Epoch")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.grid(True)
    plt.show()
    
    # Plot validation mAP per epoch
    plt.figure(figsize=(10, 4))
    plt.plot(val_maps, marker='s', color='green')
    plt.title("Validation mAP per Epoch")
    plt.xlabel("Epoch")
    plt.ylabel("mAP (IoU=1.0)")
    plt.grid(True)
    plt.show()


  """Helper function to retrieve the selected model type for the object detector."""
  def get_model(self):
    # Define backbone model
    backbone = resnet_fpn_backbone('resnet50', weights='DEFAULT')

    # Create Faster R-CNN model with custom number of classes
    model = FasterRCNN(backbone, num_classes=self.num_classes)

    # Get the number of input features for the classifier head
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # Replace the pre-trained head with a new one for our number of classes
    model.roi_heads.box_predictor = CustomFastRCNNPredictor(in_features, self.num_classes)

    # Returns selected backbone model
    return model


  """
  Helper to clean invalid boxes from a dataset as some boxes may have 0 height or width.

  Args:
    image: the associated .
    target: the target annotation boxes to clean.
  """
  def remove_invalid_boxes(self, image, target):
      boxes = target["boxes"]

      # If there are no boxes, skip this sample
      if boxes.numel() == 0 or boxes.shape[0] == 0:
          return None, None

      # Filter boxes with valid width and height
      valid = (boxes[:, 2] > boxes[:, 0]) & (boxes[:, 3] > boxes[:, 1])

      if not valid.any():
          print("All boxes invalid, skipping image.")
          return None, None

      if not valid.all():
          print(f"Dropping {(~valid).sum().item()} invalid boxes")

      # Apply mask to boxes and labels
      target["boxes"] = boxes[valid]
      if "labels" in target:
          target["labels"] = target["labels"][valid]

      return image, target


  """
  Evaluates an object detection model using mean Average Precision (mAP) and
  Average Recall (AR) metrics across the specified IOU thresholds.

  Args:
      model: The trained object detection model to evaluate.
      dataset: A dataset of image and target pairs.
      iou_thresholds: A list of IoU thresholds to use for mAP and AR calculation. Default is [0.5, 0.75].
  """
  def evaluate_map(self, model, dataset, iou_thresholds=[0.5, 0.75]):
      # Set model to evaluation mode
      model.eval()

      # Lists to store predictions and ground truth for all images
      all_preds, all_targets = [], []

      # Loop over all images in the dataset
      for img, target in dataset:
          # Add batch dimension and move image to device
          img = img.to(self.device).unsqueeze(0)

          # Run inference without gradients
          with torch.no_grad():
              pred = model(img)[0]

          # Filter out predictions with low confidence scores
          keep = pred['scores'] > 0.05
          pred_boxes = pred['boxes'][keep].cpu()
          pred_labels = pred['labels'][keep].cpu()
          pred_scores = pred['scores'][keep].cpu()

          # Store filtered predictions
          all_preds.append({
              'boxes': pred_boxes,
              'labels': pred_labels,
              'scores': pred_scores
          })

          # Store ground truth (converted to CPU)
          all_targets.append({
              'boxes': target['boxes'].cpu(),
              'labels': target['labels'].cpu()
          })

      # Compute mAP and AR metrics using predictions and targets
      return self.compute_map_ar(all_preds, all_targets, (num_classes - 1))


  """
  Computes mean Average Precision (mAP) and Average Recall (AR) for a set of predictions
  and ground truth targets across all classes.

  Args:
      preds: A list of predictions for each image.
      targets: A list of ground truth annotations for each image.
      num_classes: The number of object classes (excluding background).
  """
  def compute_map_ar(self, preds, targets, num_classes):
      # Initialize the results dictionary with default values
      results = {
          'map': 0, 'map_50': 0, 'map_75': 0,
          'map_per_class': torch.zeros(num_classes),
          'mar_1': 0, 'mar_10': 0, 'mar_100': 0,
          'mar_100_per_class': torch.zeros(num_classes),
      }

      # Lists to hold AP and AR values for each class
      aps = [[] for _ in range(num_classes)]
      ars = [[] for _ in range(num_classes)]

      # Loop through each image's predictions and targets
      for pred, target in zip(preds, targets):
          # Loop through each class (excluding background)
          for class_idx in range(1, num_classes+1):
              # Filter boxes by current class
              gt_mask = target['labels'] == class_idx
              pred_mask = pred['labels'] == class_idx

              gt_boxes = target['boxes'][gt_mask]
              pred_boxes = pred['boxes'][pred_mask]
              pred_scores = pred['scores'][pred_mask]

              # Skip if no GT or predictions
              if len(gt_boxes) == 0 and len(pred_boxes) == 0:
                  continue

              # Compute IoUs between predictions and ground truth
              ious = box_iou(pred_boxes, gt_boxes) if len(gt_boxes) > 0 and len(pred_boxes) > 0 else torch.zeros((0, 0))

              # Initialize true positives (TP) and matched GT indices
              tp = torch.zeros(len(pred_boxes))
              matched = set()

              # Match predictions to ground truth based on IoU > 0.5
              for i, row in enumerate(ious):
                  max_iou, max_j = torch.max(row, dim=0)
                  if max_iou > 0.5 and max_j.item() not in matched:
                      tp[i] = 1
                      matched.add(max_j.item())

              # Compute false positives (FP)
              fp = 1 - tp

              # Cumulative TP and FP for precision-recall curve
              cum_tp = torch.cumsum(tp, dim=0)
              cum_fp = torch.cumsum(fp, dim=0)

              # Compute recall and precision
              recalls = cum_tp / (len(gt_boxes) + 1e-6)
              precisions = cum_tp / (cum_tp + cum_fp + 1e-6)

              # Compute AP (area under precision-recall curve)
              ap = torch.trapz(precisions, recalls) if recalls.numel() > 0 else torch.tensor(0.)
              # AR is the max recall value
              ar = recalls[-1] if recalls.numel() > 0 else torch.tensor(0.)

              # Store per-class AP and AR
              aps[class_idx-1].append(ap.item())
              ars[class_idx-1].append(ar.item())

      # Compute average AP and AR for each class
      ap_avg = torch.tensor([np.mean(cls_ap) if cls_ap else 0. for cls_ap in aps])
      ar_avg = torch.tensor([np.mean(cls_ar) if cls_ar else 0. for cls_ar in ars])

      # Save results
      results['map_per_class'] = ap_avg
      results['mar_100_per_class'] = ar_avg
      results['map'] = ap_avg.mean()
      results['map_50'] = ap_avg.mean()
      results['map_75'] = ap_avg.mean()
      results['mar_100'] = ar_avg.mean()
      results['mar_10'] = ar_avg.mean()
      results['mar_1'] = ar_avg.mean()

      return results


  def save_model(self, model, optimizer, epoch, filename="checkpoint.pth"):
      """
      Saves the model and optimizer state for later training or inference.
      Args:
          model (torch.nn.Module): The model to save.
          optimizer (torch.optim.Optimizer): The optimizer used during training.
          epoch (int): Current training epoch.
          filename (str): File name to save the checkpoint.
      """
      torch.save({
          'epoch': epoch,
          'model_state_dict': model.state_dict(),
          'optimizer_state_dict': optimizer.state_dict()
      }, filename)
      print(f"✅ Saved model checkpoint to: {filename}")


  def load_model(self, filename, optimizer=None):
    """
    Loads a previously checkpointed model from a .pth file.

    Args:
        filename: the path/name of the file to load.
        optimizer: the optimizer state to load
    """
    print(f"Loading checkpointed model at filepath: {filename}")
    checkpoint = torch.load(filename, map_location=self.device)

    self.model.load_state_dict(checkpoint['model_state_dict'])
    self.model.to(self.device)

    if optimizer:
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    start_epoch = checkpoint.get('epoch', 0)
    print(f"✅ Loaded model. Resuming from epoch {start_epoch}")
    return optimizer, start_epoch


Initialise and train Faster RCNN model

In [None]:
#Initialise RCNN Object Detector
rcnn_object_detector = FRCNNObjectDetector(
    "model_01_20251602",
    classes,
    num_classes,
    combined_train_dataset,
    combined_test_dataset,
    combined_valid_dataset,
    device,
    14
)

# Train Faster RCNN Object detector
rcnn_object_detector.train_model(model_load_filepath='models/object_detection/model_01_20251602_checkpoint_epoch_10.pth')

# Test Faster RCNN Object detector on validation dataset
rcnn_object_detector.test_model(dataset_test_type='validate')

# Test Faster RCNN Object detector on test dataset
rcnn_object_detector.test_model(dataset_test_type='test')

####### Training Object Detection model - classes: ['__background__', 'sheep'] #######
Loading checkpointed model at filepath: models/object_detection/model_01_20251602_checkpoint_epoch_10.pth


  checkpoint = torch.load(filename, map_location=self.device)


✅ Loaded model. Resuming from epoch 10
Dropping 1 invalid boxes
  [Epoch 11, Iter 1] Loss: 0.6321
  [Epoch 11, Iter 2] Loss: 0.5730
  [Epoch 11, Iter 3] Loss: 0.2680
  [Epoch 11, Iter 4] Loss: 0.5271
  [Epoch 11, Iter 5] Loss: 0.5805
  [Epoch 11, Iter 6] Loss: 0.7106
  [Epoch 11, Iter 7] Loss: 0.4873
  [Epoch 11, Iter 8] Loss: 0.4874
  [Epoch 11, Iter 9] Loss: 0.4690
  [Epoch 11, Iter 10] Loss: 0.3952
  [Epoch 11, Iter 11] Loss: 0.5419
  [Epoch 11, Iter 12] Loss: 0.5275
  [Epoch 11, Iter 13] Loss: 0.3062
  [Epoch 11, Iter 14] Loss: 0.6410
  [Epoch 11, Iter 15] Loss: 0.5160
  [Epoch 11, Iter 16] Loss: 0.3291
  [Epoch 11, Iter 17] Loss: 0.4550
  [Epoch 11, Iter 18] Loss: 0.4159
  [Epoch 11, Iter 19] Loss: 0.6077
  [Epoch 11, Iter 20] Loss: 0.9595
Dropping 1 invalid boxes
  [Epoch 11, Iter 21] Loss: 0.4159
  [Epoch 11, Iter 22] Loss: 0.5886
  [Epoch 11, Iter 23] Loss: 0.4352
  [Epoch 11, Iter 24] Loss: 0.5383
  [Epoch 11, Iter 25] Loss: 0.6531
  [Epoch 11, Iter 26] Loss: 0.7148
  [Epoch 1

In [None]:
##### Inference of Model on Sample Image #####

# Set the model to evaluation mode
rcnn_object_detector.model.eval()

# Pick an image path from the training set
img_path = os.path.join(train_dir, rcnn_object_detector.train_dataset.images[0])

# Open the image and convert to RGB
img = Image.open(img_path).convert("RGB")

# Convert the image to a tensor and add a batch dimension
img_tensor = F.to_tensor(img).unsqueeze(0).to(rcnn_object_detector.device)

# Run the model in inference mode without computing gradients
with torch.no_grad():
    output = rcnn_object_detector.model(img_tensor)[0]  # Get predictions for the first image

# Create a figure to display the image
plt.figure(figsize=(10, 6))
plt.imshow(img)  # Show the original image
ax = plt.gca()   # Get the current axes

# Loop through predicted boxes, labels, and scores
for box, label, score in zip(output['boxes'], output['labels'], output['scores']):
    if score > 0.90:  # Only show predictions above a confidence threshold
        # Extract box coordinates
        x1, y1, x2, y2 = box.cpu().numpy()

        # Draw the bounding box
        ax.add_patch(plt.Rectangle(
            (x1, y1), x2 - x1, y2 - y1,
            edgecolor='red', facecolor='none', linewidth=2
        ))

        # Draw the label and score
        ax.text(
            x1, y1,
            f"{classes[label]}: {score:.2f}",
            color='white',
            bbox=dict(facecolor='red', alpha=0.5)
        )

# Hide axis ticks
plt.axis('off')
plt.show()
