In [1]:
import torch
import torchvision
import os
import numpy as np

In [2]:
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
zebra_1 = "../ui/public/data/zebra-1.jpeg"

In [2]:
# https://voxel51.com/docs/fiftyone/tutorials/evaluate_detections.html

from PIL import Image
from torchvision.transforms import functional as func

# Run the model on GPU if it is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load a pre-trained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.to(device)
model.eval()

image_raw = Image.open("../ui/public/data/zebra-1.jpeg")
image = func.to_tensor(image_raw).to(device)
c, h, w = image.shape


preds = model([image])[0]
labels = preds["labels"].cpu().detach().numpy()
scores = preds["scores"].cpu().detach().numpy()
boxes = preds["boxes"].cpu().detach().numpy()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [3]:
# https://pyimagesearch.com/2021/08/02/pytorch-object-detection-with-pre-trained-networks/

from torchvision.models import detection
import cv2

#model = detection.fasterrcnn_resnet50_fpn(pretrained=True, progress=True, pretrained_backbone=True).to(device)
model = detection.fasterrcnn_resnet50_fpn(pretrained=True, progress=True, pretrained_backbone=True)

model.eval()


image=cv2.imread(zebra_1)
orig = image.copy()


image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = image.transpose((2, 0, 1))
image = np.expand_dims(image, axis=0)
image = image / 255.0
image = torch.FloatTensor(image)

#image=image.to(device)
detections = model(image)[0]

min_confidence = 0.5
box_color = (255, 0, 0)
for i in range(0, len(detections["boxes"])):
    confidence = detections["scores"][i]
    if min_confidence > confidence:
        continue

    idx = int(detections["labels"][i])
    box = detections["boxes"][i].detach().cpu().numpy()
    (startX, startY, endX, endY) = box.astype("int")
    label = f"class_name: {confidence * 100:.2f}%"
    cv2.rectangle(orig, (startX, startY), (endX, endY), box_color, 2)
    y = startY - 15 if startY - 15 > 15 else startY + 15
    cv2.putText(orig, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 2)

cv2.imshow("Output", orig)