<a href="https://colab.research.google.com/github/MrAwesomeJr/comp-451-cat-bounding-box/blob/main/FINAL_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import fiftyone as fo
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim
import torchvision.transforms as transforms
import cv2

dataset = fo.zoo.load_zoo_dataset(
              "open-images-v7",
              split="train",
              label_types=["detections"],
              classes=["Cat"],
              max_samples=200
          )

Downloading split 'train' to '/root/fiftyone/open-images-v7/train' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'train' to '/root/fiftyone/open-images-v7/train' if necessary


Necessary images already downloaded


INFO:fiftyone.utils.openimages:Necessary images already downloaded


Existing download of split 'train' is sufficient


INFO:fiftyone.zoo.datasets:Existing download of split 'train' is sufficient


Loading 'open-images-v7' split 'train'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v7' split 'train'


 100% |█████████████████| 300/300 [1.0s elapsed, 0s remaining, 293.4 samples/s]         


INFO:eta.core.utils: 100% |█████████████████| 300/300 [1.0s elapsed, 0s remaining, 293.4 samples/s]         


Dataset 'open-images-v7-train-300' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v7-train-300' created


In [12]:
#We had to limit our sample size due to time constraints, as training was very long
n_samples = 200
x_size = 267   # (267 x 326) is the number of pixels in the smallest images
y_size = 326

In [13]:
batch_size = 5
batched_data = torch.zeros([n_samples// batch_size, batch_size, 3, y_size, x_size], dtype=torch.float32)
batched_targets = []

for batch_index in range(n_samples // batch_size):
    for i, sample in enumerate(dataset[batch_index * batch_size:(batch_index + 1) * batch_size]):
        batched_data[batch_index, i] = torch.from_numpy(cv2.resize(cv2.imread(sample['filepath']), (x_size, y_size)).transpose(2, 0, 1))
        batched_targets.append([])
        for detection in sample['ground_truth']['detections']:
                box = detection['bounding_box']

                x_min, y_min, width, height = box
                x_max = x_min + width
                y_max = y_min + height
                target = {
                    'boxes': torch.tensor([[x_min * x_size, y_min * y_size, x_max * x_size, y_max * y_size]], dtype=torch.float32),
                    'labels': torch.tensor([1 if detection['label'] == 'Cat' else 0], dtype=torch.int64),  # Assuming 1 is the label for 'Cat'
                }
                batched_targets[batch_index].append(target)

In [15]:
# using documentation from https://pytorch.org/vision/stable/models/generated/torchvision.models.detection.fasterrcnn_resnet50_fpn.html#torchvision.models.detection.fasterrcnn_resnet50_fpn
from sklearn.metrics import average_precision_score
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None, num_classes=2)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

#Stochastic gradient descent
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

num_batches = len(batched_data)  # Total number of batches
num_epochs = 1  # Number of training epochs

model.train()  # Set model to training mode

for epoch in range(num_epochs):
    epoch_loss = 0

    for batch_index in range(num_batches):

        images = batched_data[batch_index].to(device)
        targets = [{k: v.to(device) for k, v in target.items()} for target in batched_targets[batch_index]]

        # Forward pass
        loss_dict = model(list(images), targets)
        total_loss = sum(loss for loss in loss_dict.values())

        # Backward pass
        optimizer.zero_grad()
        total_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0) # previent gradient exploding
        optimizer.step()

        epoch_loss += total_loss.item()

        print(f"  Batch {batch_index + 1}/{num_batches} - Loss: {total_loss.item():.4f}")

    # Adjust learning rate
    lr_scheduler.step()

    # Save model checkpoint
    torch.save(model.state_dict(), f"fasterrcnn_epoch_{epoch + 1}.pth")



  Batch 1/40 - Loss: 59.5232
  Batch 2/40 - Loss: 41.6631
  Batch 3/40 - Loss: 16.1733
  Batch 4/40 - Loss: 8.8270
  Batch 5/40 - Loss: 7.1501
  Batch 6/40 - Loss: 3.7179
  Batch 7/40 - Loss: 3.0031
  Batch 8/40 - Loss: 3.1861
  Batch 9/40 - Loss: 2.9422
  Batch 10/40 - Loss: 1.8135
  Batch 11/40 - Loss: 1.1676
  Batch 12/40 - Loss: 0.7614
  Batch 13/40 - Loss: 1.3902
  Batch 14/40 - Loss: 1.4562
  Batch 15/40 - Loss: 0.8169
  Batch 16/40 - Loss: 0.7813
  Batch 17/40 - Loss: 1.3433
  Batch 18/40 - Loss: 0.8880
  Batch 19/40 - Loss: 1.0205
  Batch 20/40 - Loss: 0.5514
  Batch 21/40 - Loss: 0.8175
  Batch 22/40 - Loss: 1.4506
  Batch 23/40 - Loss: 0.8268
  Batch 24/40 - Loss: 0.9872
  Batch 25/40 - Loss: 0.6281
  Batch 26/40 - Loss: 0.9290
  Batch 27/40 - Loss: 0.4223
  Batch 28/40 - Loss: 0.5821
  Batch 29/40 - Loss: 0.7991
  Batch 30/40 - Loss: 0.6111
  Batch 31/40 - Loss: 0.6121
  Batch 32/40 - Loss: 0.8272
  Batch 33/40 - Loss: 0.7141
  Batch 34/40 - Loss: 0.7774
  Batch 35/40 - Loss

In [16]:
model.eval() #Setting model to eval mode
#print(output)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [17]:
import fiftyone as fo
import numpy as np

test_set = fo.zoo.load_zoo_dataset(
              "open-images-v7",
              split="test",
              label_types=["detections"],
              classes=["Cat"],
               max_samples=200,
          )

test_samples = 200

test_data = torch.zeros([test_samples, 3, y_size, x_size], dtype=torch.float32)
test_targets = []
for i, sample in enumerate(dataset[:test_samples]):
    test_data[i] = torch.from_numpy(cv2.resize(cv2.imread(sample['filepath']), (x_size, y_size)).transpose(2, 0, 1))

    boxes = []
    labels = []
    for detection in sample['ground_truth']['detections']:
        box = detection['bounding_box']

        x_min, y_min, width, height = box
        x_max = x_min + width
        y_max = y_min + height
        boxes.append([x_min * x_size, y_min * y_size, x_max * x_size, y_max * y_size])
        labels.append(1 if detection['label'] == 'Cat' else 0)
    target = {
        'boxes': torch.tensor(boxes, dtype=torch.float32),
        'labels': torch.tensor(labels, dtype=torch.int64),
    }
    test_targets.append(target)

Downloading split 'test' to '/root/fiftyone/open-images-v7/test' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'test' to '/root/fiftyone/open-images-v7/test' if necessary


Downloading 'https://storage.googleapis.com/openimages/2018_04/test/test-images-with-rotation.csv' to '/root/fiftyone/open-images-v7/test/metadata/image_ids.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/2018_04/test/test-images-with-rotation.csv' to '/root/fiftyone/open-images-v7/test/metadata/image_ids.csv'


Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v7/test/metadata/classes.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/class-descriptions-boxable.csv' to '/root/fiftyone/open-images-v7/test/metadata/classes.csv'


Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmp6stm1wr0/metadata/hierarchy.json'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy.json' to '/tmp/tmp6stm1wr0/metadata/hierarchy.json'


Downloading 'https://storage.googleapis.com/openimages/v5/test-annotations-bbox.csv' to '/root/fiftyone/open-images-v7/test/labels/detections.csv'


INFO:fiftyone.utils.openimages:Downloading 'https://storage.googleapis.com/openimages/v5/test-annotations-bbox.csv' to '/root/fiftyone/open-images-v7/test/labels/detections.csv'


Downloading 200 images


INFO:fiftyone.utils.openimages:Downloading 200 images


 100% |███████████████████| 200/200 [23.7s elapsed, 0s remaining, 9.4 files/s]       


INFO:eta.core.utils: 100% |███████████████████| 200/200 [23.7s elapsed, 0s remaining, 9.4 files/s]       


Dataset info written to '/root/fiftyone/open-images-v7/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/open-images-v7/info.json'


Loading 'open-images-v7' split 'test'


INFO:fiftyone.zoo.datasets:Loading 'open-images-v7' split 'test'


 100% |█████████████████| 200/200 [764.9ms elapsed, 0s remaining, 264.0 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 200/200 [764.9ms elapsed, 0s remaining, 264.0 samples/s]      


Dataset 'open-images-v7-test-200' created


INFO:fiftyone.zoo.datasets:Dataset 'open-images-v7-test-200' created


In [18]:
batch_size = 20  #processing in batches of 20 images at a time
predictions = []

for i in range(0, len(test_data), batch_size):
    batch = test_data[i:i + batch_size]
    print(i)
    with torch.no_grad():
        predictions_batch = model(batch)

    predictions.extend(predictions_batch)

0
20
40
60
80
100
120
140
160
180


In [23]:
# from https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

import matplotlib.pyplot as plt
%matplotlib inline
from torchvision.utils import draw_bounding_boxes
from mpl_toolkits.axes_grid1 import ImageGrid
from sklearn.metrics import f1_score, average_precision_score
import torch
import numpy as np
import math
y_true = []
y_pred = []
y_scores = []

num_plots = 200
fig = plt.figure(figsize=(num_plots, 4.*num_plots*y_size/x_size))
grid = ImageGrid(fig, 111, nrows_ncols=(num_plots, 1), axes_pad=0.1)

for i in range(num_plots):
  image_index = i

  y_true.extend(test_targets[image_index]['labels'].cpu().numpy())

  predicted_labels = predictions[image_index]['labels'].cpu().numpy()
  y_pred.extend(predicted_labels)
  predicted_scores = predictions[image_index]['scores'].detach().cpu().numpy()

  mean_pscore=np.mean(predicted_scores)
  if math.isnan(mean_pscore):
    mean_pscore=0

  y_scores.append(mean_pscore)

  label_strings = []
  for label in test_targets[image_index]['labels']:
      label_strings.append("Cat" if label == 1 else "Background")
  prediction_label_strings = []
  for j, label in enumerate(predictions[image_index]['labels']):
      prediction_label_strings.append(f"Cat: {predictions[image_index]['scores'][j]:0.2f}" if label == 1 else "Background")
  print(i,len(prediction_label_strings))
  output_image = draw_bounding_boxes(test_data[image_index].to(torch.uint8).flip(0), test_targets[image_index]['boxes'], label_strings, colors="red")
  output_image = draw_bounding_boxes(output_image, predictions[image_index]['boxes'], prediction_label_strings, colors="yellow")
  grid[i].imshow(output_image.permute(1, 2, 0))


ap = average_precision_score(y_true[:200], y_scores)
print(f"Average Precision (AP): {ap:.4f}")
f1 = f1_score(y_true[:200], y_pred[:200])
print(f"F1 score: {f1:.4f}")

0 18
1 24
2 8
3 44
4 8
5 10
6 100
7 15
8 23
9 13
10 31
11 5
12 100
13 11
14 9
15 100
16 45
17 25
18 15
19 19
20 10
21 24
22 19
23 95
24 83
25 28
26 12
27 27
28 5
29 18
30 100
31 13
32 30
33 39
34 15
35 23
36 23
37 9
38 28
39 26
40 8
41 26
42 32
43 15
44 25
45 28
46 13
47 12
48 21
49 8
50 44
51 100
52 9
53 13
54 11
55 8
56 100
57 17
58 79
59 17
60 6
61 12
62 10
63 26
64 100
65 17
66 100
67 30
68 23
69 14
70 23
71 17
72 100
73 14
74 16
75 8
76 12
77 67
78 8
79 11
80 23
81 11
82 21
83 11
84 46
85 32
86 100
87 13
88 8
89 28
90 27
91 19
92 48
93 7
94 10
95 18
96 12
97 22
98 11
99 35
100 99
101 11
102 39
103 7
104 9
105 21
106 15
107 12
108 14
109 21
110 38
111 100
112 12
113 13
114 11
115 7
116 13
117 17
118 5
119 9
120 32
121 19
122 19
123 23
124 21
125 43
126 9
127 12
128 5
129 23
130 7
131 23
132 9
133 23
134 8
135 5
136 11
137 7
138 23
139 8
140 24
141 100
142 16
143 16
144 11
145 25
146 9
147 8
148 10
149 48
150 22
151 12
152 100
153 26
154 11
155 100
156 98
157 17
158 100
159 31
160 5

ValueError: Image size of 20000x97677 pixels is too large. It must be less than 2^16 in each direction.

ValueError: Image size of 20000x97677 pixels is too large. It must be less than 2^16 in each direction.

<Figure size 20000x97677.9 with 400 Axes>