# Object Detection Using Faster RCNN
This notebook focuses on improving faster rcnn to get a batter result

In [1]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import cv2
from torchvision import transforms
import os
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import torchvision.transforms as T
import pandas as pd
from matplotlib import pyplot as plt
from torchvision.models.detection.rpn import RPNHead, RegionProposalNetwork
from torchvision.models.detection.backbone_utils import mobilenet_backbone
import glob
from PIL import Image, ImageDraw
import csv
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models.detection import FasterRCNN
import torchvision
from torchvision.ops import nms

We created dataframes for both train and validation datasets in the previous notebook and now we just use those dataframes and the given images to build a model and evaluate it

## Loading and Preprocessing Images

In [2]:
def preprocess_image(image_path, bbox, label):
    image = Image.open(image_path).convert("RGB")
    transform = transforms.Compose([transforms.ToTensor(),])
    image = transform(image)
    boxes = torch.tensor([bbox], dtype=torch.float32)
    labels = torch.tensor([label], dtype=torch.int64)
    target = {}
    target["boxes"] = boxes
    target["labels"] = labels
    return image, target

def load_dataset(csv_file, start, end):
    df = pd.read_csv(csv_file, names=['directory', 'x1', 'y1', 'x2', 'y2', 'label'])
    df = df.iloc[start:end]
    df['label'] = [1 for i in range(len(df))]
    dataset = []
    for index, row in df.iterrows():
        image_path = row['directory']
        bbox = [row['x1'], row['y1'], row['x2'], row['y2']]
        label = row['label']
        image, target = preprocess_image(image_path, bbox, label)
        dataset.append((image, target))
    return dataset

In [3]:
train_dataset = load_dataset('/content/drive/MyDrive/train_annotations.csv', 0, 364)
val_dataset = load_dataset('/content/drive/MyDrive/validation_annotations.csv', 0, 152)

In [4]:
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True,
                          num_workers=0, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False,
                        num_workers=0, collate_fn=lambda x: tuple(zip(*x)))

## Building a Faster RCNN with Custom Settings
* ResNet50 as backbone
* Custom anchor sizes
* Adjusting NMS threshold


In [25]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [26]:
anchor_generator = AnchorGenerator(sizes=((32,), (64,), (128,), (256,), (512,)),
                                   aspect_ratios=tuple([(0.5, 1.0, 2.0) for _ in range(5)]))

In [27]:
rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])

rpn = RegionProposalNetwork(
    anchor_generator=anchor_generator,
    head=rpn_head,
    fg_iou_thresh=0.7,
    bg_iou_thresh=0.3,
    batch_size_per_image=256,
    positive_fraction=0.5,
    pre_nms_top_n=dict(training=2000, testing=1000),
    post_nms_top_n=dict(training=2000, testing=300),
    nms_thresh=0.45  # Custom NMS threshold
)

In [28]:
model.rpn = rpn

In [29]:
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Want our model to be more aggressive and reduce false positives so we lower NMS threshold.

---
Run this cell if model is available

In [30]:
model.load_state_dict(torch.load('/content/faster_rcnn_v3.pth'))

<All keys matched successfully>

---

In [31]:
%%capture
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

Optimizer

In [32]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [13]:
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
    lr_scheduler.step()
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {losses.item()}")

Epoch [1/5], Loss: 0.018605565652251244
Epoch [2/5], Loss: 0.021705161780118942
Epoch [3/5], Loss: 0.01977173611521721
Epoch [4/5], Loss: 0.011389948427677155
Epoch [5/5], Loss: 0.008227230049669743


In [14]:
torch.save(model.state_dict(), 'faster_rcnn_v3.pth')

Validating model

In [12]:
val_loss = 0.0
num_batches = len(val_loader)
model.train()
with torch.no_grad():
    for images, targets in val_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        val_loss += losses.item()
val_loss /= num_batches
print(f'Validation Loss: {val_loss}')

Validation Loss: 0.020328189025780086


## Validation Fine-Tuning

In [33]:
num_epochs = 4
for epoch in range(num_epochs):
    model.train()
    for images, targets in val_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
    lr_scheduler.step()
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {losses.item()}")

Epoch [1/4], Loss: 0.017667774111032486
Epoch [2/4], Loss: 0.010981985367834568
Epoch [3/4], Loss: 0.00985522661358118
Epoch [4/4], Loss: 0.0065686507150530815


In [34]:
torch.save(model.state_dict(), 'faster_rcnn_v3.pth')

## Further Analysis: testing new images

In [37]:
def preprocess_image(image_path):
    image = Image.open(image_path)
    transform = T.Compose([
        T.ToTensor(),
    ])
    image_tensor = transform(image).unsqueeze(0)
    return image_tensor, image

In [43]:
def predict_and_save(model, device, image_tensor, image, output_dir, threshold=0.3):
    model.eval()
    image_tensor = image_tensor.to(device)
    with torch.no_grad():
        outputs = model(image_tensor)
    predictions = outputs[0]
    boxes = predictions['boxes'].cpu().numpy()
    scores = predictions['scores'].cpu().numpy()
    filtered_boxes = []
    for box, score in zip(boxes, scores):
        if score >= threshold:
            filtered_boxes.append(box)
    for box in filtered_boxes:
        draw = ImageDraw.Draw(image)
        draw.rectangle(box.tolist(), outline="red")
    image_name = os.path.basename(image_path)
    output_path = os.path.join(output_dir, image_name)
    image.save(output_path)

In [44]:
image_directory = '/content/drive/MyDrive/Q4/test/'
output_directory = '/content/drive/MyDrive/outputs_v3/'
os.makedirs(output_directory, exist_ok=True)
image_paths = glob.glob(os.path.join(image_directory, '*.jpg'))
# Process each image in the directory
for image_path in image_paths:
    image_tensor, image = preprocess_image(image_path)
    predict_and_save(model, device, image_tensor, image, output_directory)