In [1]:
import fiftyone as fo
import numpy as np

dataset = fo.zoo.load_zoo_dataset(
              "open-images-v7",
              split="train",
              label_types=["detections"],
              classes=["Cat"],
            #   max_samples=10,
          )

Downloading split 'train' to '/Users/ryan.wong/fiftyone/open-images-v7/train' if necessary
Necessary images already downloaded
Existing download of split 'train' is sufficient
Loading existing dataset 'open-images-v7-train'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
import cv2

In [3]:
# try fewer samples
# n_samples = len(dataset)
n_samples = 50


x_size = 267
y_size = 326

In [4]:
# batching is not necessary if we're using fasterrcnn_resnet50_fpn.

# batch_size = 100
# # (267 x 326) is the number of pixels in the smallest images
# # TODO: we downsize all images? i guess
# batched_data = torch.zeros([n_samples// batch_size, batch_size, 3, y_size, x_size], dtype=torch.float32)
# batched_boxes = np.ndarray((n_samples // batch_size, batch_size, 4))
# batched_targets = 

# for batch_index in range(n_samples // batch_size):
#     for i, sample in enumerate(dataset[batch_index * batch_size:(batch_index + 1) * batch_size]):
#         batched_data[batch_index, i] = cv2.resize(cv2.imread(sample['filepath']), (x_size, y_size)).transpose(2, 0, 1)

#         for detection in sample['ground_truth']['detections']:
#             if detection['label'] == 'Cat':
#                 batched_boxes[batch_index, i] = detection['bounding_box']
#                 break


# for j, box_batch in enumerate(batched_boxes):
#     for i, box in enumerate(box_batch):
#         # Convert normalized coordinates to absolute pixel values
#         x_min, y_min, width, height = box
#         x_max = x_min + width
#         y_max = y_min + height

#         # Create target dictionary
#         target = {
#             'boxes': torch.tensor([[x_min * x_size, y_min * y_size, x_max * x_size, y_max * y_size]], dtype=torch.float32),
#             'labels': torch.tensor([1], dtype=torch.int64),  # Assuming 1 is the label for 'Cat'
#         }
#         targets.append(target)


In [5]:
unbatched_data = torch.zeros([n_samples, 3, y_size, x_size], dtype=torch.float32)
# unbatched_boxes = np.ndarray((n_samples, 4))
targets = []
for i, sample in enumerate(dataset[:n_samples]):
    unbatched_data[i] = torch.from_numpy(cv2.resize(cv2.imread(sample['filepath']), (x_size, y_size)).transpose(2, 0, 1))
    for detection in sample['ground_truth']['detections']:
            box = detection['bounding_box']

            x_min, y_min, width, height = box
            x_max = x_min + width
            y_max = y_min + height
            target = {
                'boxes': torch.tensor([[x_min * x_size, y_min * y_size, x_max * x_size, y_max * y_size]], dtype=torch.float32),
                'labels': torch.tensor([1 if detection['label'] == 'Cat' else 0], dtype=torch.int64),  # Assuming 1 is the label for 'Cat'
            }
            targets.append(target)

In [8]:
# using documentation from https://pytorch.org/vision/stable/models/generated/torchvision.models.detection.fasterrcnn_resnet50_fpn.html#torchvision.models.detection.fasterrcnn_resnet50_fpn

import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None, num_classes = 2)
# print(unbatched_data.shape, unbatched_boxes.shape)
output = model(unbatched_data, targets)
# model.eval()

In [10]:
model.eval()
print(output)

{'loss_classifier': tensor(19.0357, grad_fn=<NllLossBackward0>), 'loss_box_reg': tensor(0.8428, grad_fn=<DivBackward0>), 'loss_objectness': tensor(2.1823, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>), 'loss_rpn_box_reg': tensor(1.7140, grad_fn=<DivBackward0>)}


In [None]:
import fiftyone as fo
import numpy as np

test_set = fo.zoo.load_zoo_dataset(
              "open-images-v7",
              split="test",
              label_types=["detections"],
              classes=["Cat"],
            #   max_samples=10,
          )

test_samples = 10


test_data = torch.zeros([test_samples, 3, y_size, x_size], dtype=torch.float32)
test_targets = []
for i, sample in enumerate(dataset[:test_samples]):
    unbatched_data[i] = torch.from_numpy(cv2.resize(cv2.imread(sample['filepath']), (x_size, y_size)).transpose(2, 0, 1))

    boxes = []
    labels = []
    for detection in sample['ground_truth']['detections']:
        box = detection['bounding_box']

        x_min, y_min, width, height = box
        x_max = x_min + width
        y_max = y_min + height
        boxes.append([x_min * x_size, y_min * y_size, x_max * x_size, y_max * y_size])
        labels.append(1 if detection['label'] == 'Cat' else 0)  # Assuming 1 is the label for 'Cat'
    target = {
        'boxes': torch.tensor(boxes, dtype=torch.float32),
        'labels': torch.tensor(labels, dtype=torch.int64),
    }
    test_targets.append(target)

Downloading split 'test' to '/Users/ryan.wong/fiftyone/open-images-v7/test' if necessary
Necessary images already downloaded
Existing download of split 'test' is sufficient
Loading existing dataset 'open-images-v7-test'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [12]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import average_precision_score

predictions = model(test_data)

print(predictions)

acc_score = accuracy_score(test_targets, predictions)
print(f"Accuracy Score: {acc_score}")
avg_prec_score = average_precision_score(test_targets, predictions)
print(f"Average Precision Score: {avg_prec_score}")

[{'boxes': tensor([[2.0845e+02, 3.1438e+02, 2.2360e+02, 3.2596e+02],
        [2.0043e+02, 3.1436e+02, 2.1559e+02, 3.2596e+02],
        [7.4928e+01, 3.1435e+02, 9.0095e+01, 3.2596e+02],
        [1.2566e+02, 3.1435e+02, 1.4082e+02, 3.2596e+02],
        [1.7639e+02, 3.1435e+02, 1.9155e+02, 3.2596e+02],
        [1.0430e+02, 3.1435e+02, 1.1946e+02, 3.2596e+02],
        [9.6288e+01, 3.1435e+02, 1.1145e+02, 3.2596e+02],
        [1.1765e+02, 3.1435e+02, 1.3281e+02, 3.2596e+02],
        [1.3100e+02, 3.1435e+02, 1.4616e+02, 3.2596e+02],
        [1.3634e+02, 3.1435e+02, 1.5150e+02, 3.2596e+02],
        [1.4702e+02, 3.1435e+02, 1.6218e+02, 3.2596e+02],
        [1.8173e+02, 3.1435e+02, 1.9689e+02, 3.2596e+02],
        [1.0964e+02, 3.1435e+02, 1.2480e+02, 3.2596e+02],
        [1.5236e+02, 3.1435e+02, 1.6752e+02, 3.2596e+02],
        [8.2938e+01, 3.1435e+02, 9.8105e+01, 3.2596e+02],
        [9.0948e+01, 3.1435e+02, 1.0611e+02, 3.2596e+02],
        [1.7105e+02, 3.1435e+02, 1.8621e+02, 3.2596e+02],
   

ValueError: Found input variables with inconsistent numbers of samples: [13, 10]