In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights

In [None]:
#Check GPU is available or not

device= torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

#Download the pretrained  FR-CNN model
weights= FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model= torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

#Move the model to the available device
model=model.to(device)

#Save the model to a file (optional)

torch.save(model.state_dict(), "faster_rcon_resnet50_fpn.pth")
print("Model downloaded and saved successfully!")

Using device: cuda


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 154MB/s]


Model downloaded and saved successfully!


In [None]:
#Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
print(f"Number of input features for the classifier: {in_features}")

#Define the number of classes in your dataset (including background)
num_classes = 91

# Replace the pre-trained head with a new one (adjusted for our dataset)
model.roi_heads.box_predictor = FastRCNNPredictor (in_features, num_classes)
print(f"Classifier head replaced to accommodate {num_classes} classes.")

# Move the model to the available device
model = model.to(device)

#Save the model to a file (optional)
torch.save(model.state_dict(), "faster_rcnn_resnet50_fpn_modified.pth")
print("Model modified successfully!")

Number of input features for the classifier: 1024
Classifier head replaced to accommodate 91 classes.
Model modified successfully!


In [None]:
print(model)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
import numpy as np
import os
from PIL import Image
from google.colab import drive
drive.mount('/content/drive')

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Download the pretrained Faster R-CNN model
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

# Move the model to the available device
model = model.to(device)

# Save the model to a file (optional)
torch.save(model.state_dict(), "faster_rcnn_resnet50_fpn.pth")
print("Model downloaded and saved successfully!")

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
print(f"Number of input features for the classifier: {in_features}")

# Define the number of classes in your dataset (including background)
num_classes = 91  # Change this to the number of classes in your dataset

# Replace the pre-trained head with a new one (adjusted for our dataset)
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
print(f"Classifier head replaced to accommodate {num_classes} classes.")

# Move the model to the available device
model = model.to(device)

# Save the modified model to a file (optional)
torch.save(model.state_dict(), "faster_rcnn_resnet50_fpn_modified.pth")
print("Model modified successfully!")

# Define a custom dataset class for loading images and annotations
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root, ann_file, transform=None):
        self.root = root
        self.transforms = transform
        self.coco = torchvision.datasets.CocoDetection(root=root, annFile=ann_file)

    def __getitem__(self, idx):
        img, targets = self.coco[idx]
        img = F.to_tensor(img)

        # Convert targets to the format required by Faster R-CNN
        boxes = []
        labels = []
        for target in targets:
            boxes.append(target['bbox'])  # Assuming 'bbox' is in [x, y, width, height]
            labels.append(target['category_id'])  # Assuming 'category_id' is the label

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.coco)

# Define paths to your dataset
root = "/content/drive/My Drive/val2017/"  # Replace with your image directory
ann_file = "/content/drive/My Drive/instances_val2017/instances_val2017.json"  # Replace with your annotation file

# Create the dataset and data loader
dataset = CustomDataset(root=root, ann_file=ann_file)
data_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4)

# Training loop
num_epochs = 10  # Set the number of epochs
model.train()  # Set the model to training mode

# Define an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

for epoch in range(num_epochs):
    for images, targets in data_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets)

        # Compute the total loss
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {losses.item():.4f}")

# Save the trained model
torch.save(model.state_dict(), "faster_rcnn_resnet50_fpn_trained.pth")
print("Trained model saved successfully!")

# Set the model to evaluation mode
model.eval()

# Function to evaluate the model on a single image
def evaluate_model(image_path):
    image = Image.open(image_path).convert("RGB")
    image_tensor = F.to_tensor(image).unsqueeze(0).to(device)  # Add batch dimension

    with torch.no_grad():
        predictions = model(image_tensor)

    # Print the predictions
    print("Predictions:")
    for i, (boxes, labels, scores) in enumerate(zip(predictions[0]['boxes'], predictions[0]['labels'], predictions[0]['scores'])):
        if scores[i] > 0.5:  # Only consider predictions with a score above a threshold
            print(f"Object {i + 1}:")
            print(f"  Bounding Box: {boxes.cpu().numpy()}")
            print(f"  Label: {labels.cpu().numpy()}")
            print(f"  Score: {scores.cpu().numpy()}")

# Example usage: Evaluate the model on a test image
test_image_path = "/content/drive/MyDrive/val2017/000000000139.jpg"  # Replace with your test image path
evaluate_model(test_image_path)