<a href="https://www.kaggle.com/code/ivanderwijaya/cars-detection-using-faster-r-cnn?scriptVersionId=246459958" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

#Cars Detection using Faster R-CNN

In this notebook, we will doing object detection on [Self Driving Cars Dataset](https://www.kaggle.com/datasets/alincijov/self-driving-cars/data) to detect cars in the images using Faster R-CNN model.

In this notebook, we will use Faster R-CNN model with this configuration:


## 1. Import Self Driving Cars Dataset from Kaggle

In [None]:
# Install required dependencies

!pip install opendatasets
!pip install pandas

In [None]:
import opendatasets as od
import pandas as pd

# od.download("https://www.kaggle.com/datasets/alincijov/self-driving-cars/data") #Uncomment this if you are running this besides on Kaggle Notebook

import kagglehub

# Download latest version
path = kagglehub.dataset_download("alincijov/self-driving-cars")

print("Path to dataset files:", path)

In [None]:
file =('/kaggle/input/self-driving-cars/labels_train.csv') #Adjust the file path if you are running this besides on kaggle
newData = pd.read_csv(file)
newData.head()

## 2. Data Processing

In [None]:
# Check for empty dataset
if newData.empty:
  print("Dataset is empty. No data to process or train.")
else:
  print("Dataset loaded successfully with", len(newData), "rows.")

In [None]:
# Check for redundancy (duplicate rows)
# Use keep=False to mark all duplicate occurrences as True
duplicate_mask = newData.duplicated(keep=False)
duplicate_count = duplicate_mask.sum()

if duplicate_count > 0:
  print("Found", duplicate_count, "duplicate rows. Removing duplicates.")
  print("Showing duplicate rows:")
  # Show data duplicates
  display(newData[duplicate_mask]) # Use display for better formatting in Colab
  # Remove duplicates
  newData.drop_duplicates(inplace=True)
  print("Dataset now has", len(newData), "rows after removing duplicates.")
else:
  print("No duplicate rows found.")

In [None]:
# Check for missing values
missing_values = newData.isnull().sum()
if missing_values.sum() > 0:
  print("\nMissing values per column:")
  print(missing_values[missing_values > 0])
else:
  print("\nNo missing values found.")

In [None]:
# Check for invalid bounding box dimensions (width or height <= 0)
invalid_bbox_mask = (newData['xmax'] <= newData['xmin']) | (newData['ymax'] <= newData['ymin'])
invalid_bbox_count = invalid_bbox_mask.sum()

if invalid_bbox_count > 0:
    print("\nFound", invalid_bbox_count, "rows with invalid bounding box dimensions.")
    print("Showing invalid rows:")
    display(newData[invalid_bbox_mask])
    print("Removing rows with invalid bounding box dimensions.")
    newData = newData[~invalid_bbox_mask].copy() # Use .copy() to avoid SettingWithCopyWarning
    print("Dataset now has", len(newData), "rows after removing invalid bounding boxes.")
else:
    print("\nNo rows with invalid bounding box dimensions found.")

In [None]:
print("\nAnalyzing class distribution...")

class_counts = newData['class_id'].value_counts()

print("Class distribution:")
print(class_counts)

# You can visualize this distribution with a bar plot
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
class_counts.plot(kind='bar')
plt.title('Distribution of Classes')
plt.xlabel('Class')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# Analyze for imbalance
if class_counts.min() / class_counts.max() < 0.1: # Example threshold for imbalance
    print("\nPotential class imbalance detected. Some classes have significantly fewer examples.")
    print("Consider data augmentation or re-sampling techniques during training.")

## 3. Data Preparation

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import functional as F
import torchvision.transforms as T
from PIL import Image
import os
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split
import random
import numpy as np

In [None]:
# Set random seed for reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

In [None]:
# Define custom dataset class
class SelfDrivingCarsDataset(Dataset):
    def __init__(self, dataframe, img_dir, transforms=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transforms = transforms
        self.image_filenames = self.dataframe['frame'].unique()

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_filename = self.image_filenames[idx]
        img_path = os.path.join(self.img_dir, img_filename)
        img = Image.open(img_path).convert("RGB")

        # Get annotations for the current image
        img_annotations = self.dataframe[self.dataframe['frame'] == img_filename]

        boxes = []
        labels = []
        for index, row in img_annotations.iterrows():
            # 'x_min', 'y_min', 'x_max', 'y_max' are the columns for bounding box coordinates
            # 'class_id' is the column for the class label (must be int and start from 1)
            x_min, y_min, x_max, y_max = row['xmin'], row['ymin'], row['xmax'], row['ymax']
            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(int(row['class_id'])) # Ensure labels are integers

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(boxes),), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        target["filename"] = img_filename # Add the filename to the target

        if self.transforms is not None:
            for t in self.transforms:
              if isinstance(t, T.ToTensor):
                img = t(img)

        return img, target

In [None]:
# Define transformation function (can be expanded for data augmentation)
def get_transform(train):
    transforms = []
    # Convert PIL Image to PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # Add more transformations for training (e.g., horizontal flip)
        transforms.append(T.RandomHorizontalFlip(0.5))
    return transforms


In [None]:
# Helper function to collate data
def collate_fn(batch):
    return tuple(zip(*batch))

In [None]:
# Split data into training and testing sets
train_df, test_df = train_test_split(newData, test_size=0.2, random_state=SEED)

# Declare image directory
img_directory = '/kaggle/input/self-driving-cars/images' # Update this path if the image path are in different directory

# Create datasets
train_dataset = SelfDrivingCarsDataset(train_df, img_directory, transforms=get_transform(train=True))
test_dataset = SelfDrivingCarsDataset(test_df, img_directory, transforms=get_transform(train=False))

## 4. Model Setup

In [None]:
# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=4, # Adjust batch size as needed
    shuffle=True,
    num_workers=2, # Adjust based on your system's capabilities
    collate_fn=collate_fn
)

test_loader = DataLoader(
    test_dataset,
    batch_size=1, # Batch size for testing is often 1
    shuffle=False,
    num_workers=2,
    collate_fn=collate_fn
)
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [None]:
# Load a pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Replace the classifier with a new one, that has
# num_classes which is the number of background + classes
num_classes = newData['class_id'].nunique() + 1 # Add 1 for background class
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move model to the appropriate device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

In [None]:
# Define optimizer and learning rate scheduler
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

print(f"Training on {device}")
print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of testing samples: {len(test_dataset)}")
print(f"Number of classes (including background): {num_classes}")

## 5. Faster R-CNN Training

In [None]:
import gc

def train_model(model, optimizer, data_loader, device, epoch):
    model.train()
    running_loss = 0.0

    # Initialize running totals for individual loss components
    running_classifier_loss = 0.0
    running_box_reg_loss = 0.0
    running_objectness_loss = 0.0
    running_rpn_box_reg_loss = 0.0

    print(f"Epoch {epoch+1}")

    # Wrap your data loader with tqdm for a progress bar
    from tqdm import tqdm
    data_loader_tqdm = tqdm(data_loader, desc=f"Training Epoch {epoch+1}")

    for i, (images, targets) in enumerate(data_loader_tqdm):
        images = list(image.to(device) for image in images)
         
        targets_on_device = []
        for t in targets:
            target_on_device = {}
            # Iterate through the expected keys that are tensors
            # These are typically 'boxes', 'labels', 'image_id', 'area', 'iscrowd'
            for key in ['boxes', 'labels', 'image_id', 'area', 'iscrowd']:
                 if key in t:
                     # Ensure it's a tensor and move it
                     if isinstance(t[key], torch.Tensor):
                         target_on_device[key] = t[key].to(device)
                     else:
                         # This case shouldn't ideally happen for these keys,
                         # but include a safeguard or a warning if needed.
                         print(f"Warning: Target key '{key}' is not a tensor for image.") # Optional warning
                         target_on_device[key] = t[key] # Keep on CPU if not a tensor
                 else:
                     print(f"Warning: Target key '{key}' not found in target dictionary.") # Optional warning

            # Also add the non-tensor data like filename
            if 'filename' in t:
                 target_on_device['filename'] = t['filename']

            targets_on_device.append(target_on_device)

        loss_dict = model(images, targets_on_device)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        # Accumulate total loss and individual loss components
        running_loss += losses.item()
        running_classifier_loss += loss_dict['loss_classifier'].item()
        running_box_reg_loss += loss_dict['loss_box_reg'].item()
        running_objectness_loss += loss_dict['loss_objectness'].item()
        running_rpn_box_reg_loss += loss_dict['loss_rpn_box_reg'].item()

        # Update the progress bar description with current batch loss
        data_loader_tqdm.set_postfix({
            'batch_loss': losses.item(),
            'avg_epoch_loss': running_loss / (i + 1),
            'avg_cls_loss': running_classifier_loss / (i + 1),
            'avg_box_loss': running_box_reg_loss / (i + 1),
            'avg_obj_loss': running_objectness_loss / (i + 1),
            'avg_rpn_box_loss': running_rpn_box_reg_loss / (i + 1)
        })


    # Calculate average losses for the epoch
    epoch_loss = running_loss / len(data_loader)
    epoch_classifier_loss = running_classifier_loss / len(data_loader)
    epoch_box_reg_loss = running_box_reg_loss / len(data_loader)
    epoch_objectness_loss = running_objectness_loss / len(data_loader)
    epoch_rpn_box_reg_loss = running_rpn_box_reg_loss / len(data_loader)


    print(f"Epoch {epoch+1} finished:")
    print(f"  Average Total Loss: {epoch_loss:.4f}")
    print(f"  Average Classifier Loss: {epoch_classifier_loss:.4f}")
    print(f"  Average Bounding Box Regression Loss: {epoch_box_reg_loss:.4f}")
    print(f"  Average Objectness Loss: {epoch_objectness_loss:.4f}")
    print(f"  Average RPN Bounding Box Regression Loss: {epoch_rpn_box_reg_loss:.4f}")

print(f"Starting training... on {device}")
if torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()

In [None]:
# Define the number of epochs
num_epochs = 5 # You can adjust this, increasing it is recommended

for epoch in range(num_epochs):
    train_model(model, optimizer, train_loader, device, epoch)
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()
    # Update the learning rate
    lr_scheduler.step()

print("Training complete.")

## 6. Evaluation

In [None]:
import torch
from tqdm import tqdm
import gc
import json # Or use pandas and save to CSV/Parquet

def evaluate_and_save_predictions(model, data_loader, device, output_path="inference_results.json"):
    model.eval()
    all_results = [] # Accumulate results for saving

    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        gc.collect()

    print("Starting inference and saving results...")

    for i, (images, targets) in enumerate(tqdm(data_loader, desc="Performing Inference and Saving")):
        images = list(img.to(device) for img in images)
        # Optional: Move targets to device as well
        # targets_on_device = [{k: v.to(device) for k, v in t.items()} for t in targets]

        with torch.no_grad():
            output = model(images)

        # Process results and prepare for saving (still on CPU after .cpu())
        for j in range(len(output)):
            img_filename = targets[j]['filename'] # Get the filename
            
            img_predictions = {
                'image_filename': img_filename, # Use filename as identifier
                'boxes': output[j]['boxes'].cpu().tolist(), # Convert tensor to list
                'labels': output[j]['labels'].cpu().tolist(),
                'scores': output[j]['scores'].cpu().tolist()
            }
            all_results.append(img_predictions)

        # Save periodically to avoid accumulating too much in memory
        if (i + 1) % 100 == 0 or (i + 1) == len(data_loader): # Save every 100 batches or at the end
            mode = 'w' if i < 100 else 'a' # Write initially, append later
            with open(output_path, mode) as f:
                # If appending, need to handle JSON array structure
                if mode == 'a':
                     # Read existing, append, and rewrite or use a format suitable for appending
                     # Simple append might not work for a single JSON array.
                     # Consider writing each result on a new line and processing line by line later,
                     # or use a format like JSON Lines (jsonl).
                     # For simplicity here, let's overwrite for demonstration or use a list of dicts and save once at the end.
                     # Let's adjust to save the list of dicts at the end, but manage memory by processing one batch at a time.
                     pass # We will save all_results at the very end.

        # Clear memory after each batch
        del images, targets, output #, targets_on_device # uncomment if you moved targets to device
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

    # Save all accumulated results to a single file at the end
    with open(output_path, 'w') as f:
        json.dump(all_results, f)

    print(f"Inference complete. Results saved to {output_path}")
    # You would then load this file later to calculate metrics

In [None]:
# Call the new evaluation function after the training loop
# This will perform inference and collect predictions and ground truths
if torch.cuda.is_available():
    torch.cuda.empty_cache()
gc.collect()
predictions, ground_truths = evaluate_and_save_predictions(model, test_loader, device)

In [None]:
import json
import pandas as pd

# Load predictions from the saved file
with open('/kaggle/input/inference-result/inference_results.json', 'r') as f:
    all_predictions = json.load(f)

# Load the ground truth data for the test set
ground_truth_df = test_df.copy() # Make a copy to avoid modifying the original

In [None]:
import torch
from torchvision.ops import box_iou
import pandas as pd
import numpy as np # Import numpy

def match_predictions_to_ground_truth(predictions, ground_truth_df, iou_threshold=0.5):
    matched_results = []

    # Get unique class IDs from the ground truth to ensure all classes are represented
    # Add a "Background" label (often represented by class ID 0 or a special index)
    # Assuming class IDs are 1-based, let's use 0 for background/no object
    unique_classes = sorted(ground_truth_df['class_id'].unique().tolist())
    # class_labels = ['Background'] + [f'Class {c}' for c in unique_classes]
    # Let's stick to class IDs for the matrix indexing for now

    for prediction in predictions:
        image_filename = prediction['image_filename']
        predicted_boxes = torch.tensor(prediction['boxes'], dtype=torch.float32)
        predicted_labels = torch.tensor(prediction['labels'], dtype=torch.int64)
        predicted_scores = torch.tensor(prediction['scores'], dtype=torch.float32)

        img_ground_truth = ground_truth_df[ground_truth_df['frame'] == image_filename]

        gt_boxes = torch.tensor(img_ground_truth[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype=torch.float32)
        gt_labels = torch.tensor(img_ground_truth['class_id'].values, dtype=torch.int64)

        # Track matched ground truth indices for False Negatives
        matched_gt_indices = -torch.ones(len(gt_boxes), dtype=torch.long)

        if len(predicted_boxes) > 0:
            if len(gt_boxes) > 0:
                iou_matrix = box_iou(predicted_boxes, gt_boxes)

                # Sort predictions by confidence score in descending order
                sorted_scores, sorted_indices = predicted_scores.sort(descending=True)

                for i in sorted_indices:
                    pred_box = predicted_boxes[i]
                    pred_label = predicted_labels[i]
                    pred_score = predicted_scores[i]

                    # Find the best ground truth match for this prediction
                    best_iou = 0
                    best_gt_idx = -1

                    for gt_idx in range(len(gt_boxes)):
                        # Check if the ground truth box is not already matched
                        if matched_gt_indices[gt_idx] == -1:
                            current_iou = iou_matrix[i, gt_idx]
                            if current_iou > best_iou and current_iou >= iou_threshold:
                                best_iou = current_iou
                                best_gt_idx = gt_idx

                    if best_gt_idx != -1:
                        # Matched to a ground truth box. Record both predicted and ground truth labels.
                        matched_results.append({
                            'image_filename': image_filename,
                            'predicted_box': pred_box.tolist(),
                            'predicted_label': pred_label.item(),
                            'predicted_score': pred_score.item(),
                            'actual_label': gt_labels[best_gt_idx].item(), # Actual ground truth label
                            'is_tp': True,
                            'is_fn': False,
                            'is_fp': False,
                            'matched_gt_box': gt_boxes[best_gt_idx].tolist()
                        })
                        matched_gt_indices[best_gt_idx] = i # Mark ground truth as matched
                    else:
                        # No match with IoU > threshold or all potential ground truths already matched
                        # This prediction is a False Positive.
                        matched_results.append({
                            'image_filename': image_filename,
                            'predicted_box': pred_box.tolist(),
                            'predicted_label': pred_label.item(),
                            'predicted_score': pred_score.item(),
                            'actual_label': None, # No corresponding ground truth
                            'is_tp': False,
                            'is_fn': False,
                            'is_fp': True,
                            'matched_gt_box': None
                        })

            elif len(gt_boxes) == 0:
                 # Predictions but no ground truth for this image -> all are False Positives
                for i in range(len(predicted_boxes)):
                     matched_results.append({
                        'image_filename': image_filename,
                        'predicted_box': predicted_boxes[i].tolist(),
                        'predicted_label': predicted_labels[i].item(),
                        'predicted_score': predicted_scores[i].item(),
                        'actual_label': None,
                        'is_tp': False,
                        'is_fn': False,
                        'is_fp': True,
                        'matched_gt_box': None
                     })

        # Identify False Negatives (ground truth boxes not matched by any prediction)
        for gt_idx in range(len(gt_boxes)):
            if matched_gt_indices[gt_idx] == -1:
                 matched_results.append({
                    'image_filename': image_filename,
                    'predicted_box': None, # No predicted box for this FN
                    'predicted_label': None, # No predicted label
                    'predicted_score': None,
                    'actual_label': gt_labels[gt_idx].item(), # Actual ground truth label
                    'is_tp': False,
                    'is_fn': True,
                    'is_fp': False,
                    'matched_gt_box': gt_boxes[gt_idx].tolist()
                 })
        # If both predicted_boxes and gt_boxes are empty, nothing to add to results

    return pd.DataFrame(matched_results)

In [None]:
# Call the modified function
matched_results_df = match_predictions_to_ground_truth(all_predictions, ground_truth_df, iou_threshold=0.5)

In [None]:
print(matched_results_df)

In [None]:
import numpy as np

def calculate_ap(matched_results_df, class_id):
    # Filter results for the specific class's predictions and ground truths
    # We need rows that are TPs or FPs with this predicted_label
    # AND rows that are FNs with this actual_label
    class_results = matched_results_df[
        ((matched_results_df['predicted_label'] == class_id) & (matched_results_df['is_tp'] | matched_results_df['is_fp'])) |
        ((matched_results_df['actual_label'] == class_id) & matched_results_df['is_fn'])
    ].copy()

    # We only need the rows that are either TPs or FPs for sorting by score.
    # FNs don't have a score or predicted box.
    predictions_for_ap = class_results[class_results['predicted_label'] == class_id].copy()

    if predictions_for_ap.empty and class_results[class_results['is_fn']].empty:
        return 0.0 # AP is 0 if no predictions or ground truth for this class

    # Sort predictions by confidence score in descending order
    predictions_for_ap.sort_values(by='predicted_score', ascending=False, inplace=True)

    # Calculate cumulative True Positives (TP) and False Positives (FP) based on the sorted predictions
    predictions_for_ap['tp_cumulative'] = predictions_for_ap['is_tp'].cumsum()
    predictions_for_ap['fp_cumulative'] = predictions_for_ap['is_fp'].cumsum()

    # Calculate the total number of ground truth objects for this class.
    # This is the sum of TPs and FNs for this class in the ENTIRE test set results.
    total_ground_truth = matched_results_df[matched_results_df['actual_label'] == class_id]['is_fn'].sum() + \
                         matched_results_df[matched_results_df['actual_label'] == class_id]['is_tp'].sum()


    if total_ground_truth == 0:
        # If there are no ground truth objects for this class, AP is undefined or 0.0 by convention.
        # If there were predictions but no ground truth, they are all FPs, leading to 0 precision.
        # If there were no predictions and no ground truth, AP is 0.
        return 0.0

    # Calculate precision and recall at each prediction point
    # Handle division by zero for precision if no predictions yet
    predictions_for_ap['precision'] = predictions_for_ap['tp_cumulative'] / (predictions_for_ap['tp_cumulative'] + predictions_for_ap['fp_cumulative'])
    predictions_for_ap['recall'] = predictions_for_ap['tp_cumulative'] / total_ground_truth

    # Calculate AP using the 11-point interpolation method (or area under the curve)
    # Here we'll use the 11-point method as a common approach
    recall_levels = np.linspace(0, 1, 11)
    ap = 0
    for r_level in recall_levels:
        # Find the maximum precision for recalls greater than or equal to the current recall_level
        precisions_at_r_level = predictions_for_ap[predictions_for_ap['recall'] >= r_level]['precision']
        if not precisions_at_r_level.empty:
            ap += precisions_at_r_level.max()
    ap /= 11

    return ap

# Get unique class IDs from the ground truth
# This remains the same as it correctly identifies the classes present in the ground truth
unique_classes = sorted(ground_truth_df['class_id'].unique().tolist())

# Calculate AP for each class
ap_per_class = {}
for class_id in unique_classes:
    ap_per_class[class_id] = calculate_ap(matched_results_df, class_id)

print("Average Precision (AP) per class (at IoU=0.5):")
for class_id, ap_score in ap_per_class.items():
    print(f"  Class {class_id}: {ap_score:.4f}")

In [None]:
# Calculate mAP
if len(ap_per_class) > 0:
    mean_ap = sum(ap_per_class.values()) / len(ap_per_class)
    print(f"\nMean Average Precision (mAP at IoU=0.5): {mean_ap:.4f}")
else:
    print("\nCould not calculate mAP as there are no classes with ground truth data.")

In [None]:
# Calculate overall precision and recall
total_tp = matched_results_df['is_tp'].sum()
total_fp = matched_results_df['is_fp'].sum()
total_fn = matched_results_df['is_fn'].sum()

overall_precision = total_tp / (total_tp + total_fp) if (total_tp + total_fp) > 0 else 0.0
overall_recall = total_tp / (total_tp + total_fn) if (total_tp + total_fn) > 0 else 0.0
overall_f1_score = 2 * (overall_precision * overall_recall) / (overall_precision + overall_recall) if (overall_precision + overall_recall) > 0 else 0.0

print("\nOverall Metrics (at IoU=0.5):")
print(f"  Total True Positives: {total_tp:.4f}")
print(f"  Total False Positives: {total_fp:.4f}")
print(f"  Total False Negatives: {total_fn:.4f}")
print(f"  Overall Precision: {overall_precision:.4f}")
print(f"  Overall Recall: {overall_recall:.4f}")
print(f"  Overall F1-score: {overall_f1_score:.4f}")

In [None]:
# Get the list of all possible class IDs (including background/no object)
unique_actual_classes = sorted(ground_truth_df['class_id'].unique().tolist())
# Determine the range of possible class IDs
# The maximum class ID will define the size of our matrix.
max_class_id = max(unique_actual_classes) if unique_actual_classes else 0
all_possible_class_ids = list(range(max_class_id + 1)) # Includes 0 for background

# Initialize confusion matrix with zeros
# Rows: Actual Class (including Background)
# Columns: Predicted Class (including Background)
confusion_matrix_counts = np.zeros((len(all_possible_class_ids), len(all_possible_class_ids)), dtype=int)

# Populate the confusion matrix
for index, row in matched_results_df.iterrows():
    actual_label = row['actual_label']
    predicted_label = row['predicted_label']
    is_tp = row['is_tp']
    is_fp = row['is_fp']
    is_fn = row['is_fn']

    if is_tp:
        # True Positive: Actual class was 'actual_label', predicted class was 'predicted_label'
        # Since it's a TP, actual_label and predicted_label should be the same object class
        if actual_label is not None and predicted_label is not None:
             # Map actual_label to its index in all_possible_class_ids
             actual_idx = all_possible_class_ids.index(actual_label)
             predicted_idx = all_possible_class_ids.index(predicted_label)
             confusion_matrix_counts[actual_idx, predicted_idx] += 1
    elif is_fp:
        # False Positive: Predicted 'predicted_label', but there was no actual object ('Background')
        if predicted_label is not None:
            # Actual class is 'Background' (index 0)
            actual_idx = 0
            # Predicted class is the predicted object class
            predicted_idx = all_possible_class_ids.index(predicted_label)
            confusion_matrix_counts[actual_idx, predicted_idx] += 1
    elif is_fn:
        # False Negative: Actual class was 'actual_label', but nothing was predicted ('Background')
        if actual_label is not None:
            # Actual class is the actual object class
            actual_idx = all_possible_class_ids.index(actual_label)
            # Predicted class is 'Background' (index 0)
            predicted_idx = 0
            confusion_matrix_counts[actual_idx, predicted_idx] += 1

# Note: True Negatives (correctly identifying background as background) are not explicitly
# counted in this object detection context the same way as in classification.
# The row corresponding to 'Background' in the confusion matrix (index 0) will primarily
# show False Positives (predicted object when actual is background).

print("\nConfusion Matrix Counts:")
print(confusion_matrix_counts)

In [None]:
# Create labels for the confusion matrix axes
# Map class IDs back to meaningful names if you have them
# For now, let's use "Background" and "Class X"
confusion_matrix_labels = ['Background'] + [f'Class {c}' for c in unique_actual_classes]

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Plot the confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix_counts, annot=True, fmt='d', cmap='Blues',
            xticklabels=confusion_matrix_labels, yticklabels=confusion_matrix_labels)
plt.xlabel('Predicted Class')
plt.ylabel('Actual Class')
plt.title('Object Detection Confusion Matrix (IoU=0.5)')
plt.show()

In [None]:
# Define a path to save the model
model_save_path = 'faster_rcnn_cars.pth'

# Save the model's state dictionary
torch.save(model.state_dict(), model_save_path)

print(f"Model saved to {model_save_path}")

# 7. Load Model (Optional)

In [None]:
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

file =('/kaggle/input/self-driving-cars/labels_train.csv') #Adjust the file path if you are running this besides on kaggle
newData = pd.read_csv(file)
newData.head()

# Define a path to load the model
model_save_path = 'faster_rcnn_cars.pth'

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# --- To load the model later for inference ---

# First, instantiate a model with the same architecture as the trained one
# You need to redefine or import the model definition (fasterrcnn_resnet50_fpn with the correct number of classes)
loaded_model = fasterrcnn_resnet50_fpn(pretrained=False) # No need for pre-training weights
num_classes = newData['class_id'].nunique() + 1 # Ensure num_classes is the same as during training
in_features = loaded_model.roi_heads.box_predictor.cls_score.in_features
loaded_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the saved state dictionary into the model
loaded_model.load_state_dict(torch.load(model_save_path,map_location=device))

# Move the loaded model to the appropriate device (e.g., CPU or GPU)
loaded_model.to(device)

# Set the model to evaluation mode
loaded_model.eval()

model = loaded_model

print("Model loaded successfully for inference.")

# Now 'loaded_model' can be used to make predictions on new images.
# For inference, you would pass new images through this loaded_model
# and process the outputs (boxes, labels, scores).