In [1]:
import torch
from torchvision import models, transforms
from PIL import Image

In [2]:
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:08<00:00, 20.8MB/s]


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

In [4]:
def detect_objects(image_path):
    image = Image.open(image_path)
    image_tensor = transform(image)
    predictions = model([image_tensor])[0]  # Get predictions
    return predictions

In [5]:
import cv2

In [6]:
def draw_boxes(image, predictions):
    for box in predictions['boxes']:
        x1, y1, x2, y2 = box
        cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0), 2)
    return image

In [10]:
import numpy as np

In [None]:
model = models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Define a transformation for the input image
transform = transforms.Compose([
    transforms.ToTensor(),
])

def detect_objects(image_path):
    # Load and transform the image
    image = Image.open(image_path)
    image_tensor = transform(image)

    # Make predictions
    with torch.no_grad():  # Disable gradient calculation
        predictions = model([image_tensor])[0]  # Get predictions

    return image, predictions

def draw_boxes(image, predictions):
    # Convert the image to a format suitable for OpenCV
    image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

    # Get the number of detections
    num_detections = len(predictions['boxes'])

    for i in range(num_detections):
        box = predictions['boxes'][i]
        label = predictions['labels'][i]
        score = predictions['scores'][i].item()  # Get score

        # Only draw boxes for predictions with a high confidence score
        if score > 0.5:  # Adjust the threshold as needed
            x1, y1, x2, y2 = box.int().tolist()
            cv2.rectangle(image_cv, (x1, y1), (x2, y2), (255, 0, 0), 2)
            cv2.putText(image_cv, f'Label: {label.item()} ({score:.2f})', (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

    return image_cv


# Example usage
image_path = './dog.jpeg'  # Replace with your image path
image, predictions = detect_objects(image_path)
result_image = draw_boxes(image, predictions)

# Display the result
cv2.imshow('Detected Objects', result_image)
cv2.waitKey(0)
cv2.destroyAllWindows()