# Fifty One and Pytorch Installation

In [None]:
!pip install fiftyone

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting fiftyone
  Downloading fiftyone-0.18.0-py3-none-any.whl (2.3 MB)
[K     |████████████████████████████████| 2.3 MB 28.1 MB/s 
Collecting fiftyone-db<0.5,>=0.4
  Downloading fiftyone_db-0.4.0-py3-none-manylinux1_x86_64.whl (37.8 MB)
[K     |████████████████████████████████| 37.8 MB 1.3 MB/s 
[?25hCollecting Jinja2>=3
  Downloading Jinja2-3.1.2-py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 72.9 MB/s 
Collecting mongoengine==0.24.2
  Downloading mongoengine-0.24.2-py3-none-any.whl (108 kB)
[K     |████████████████████████████████| 108 kB 83.3 MB/s 
Collecting ndjson
  Downloading ndjson-0.3.1-py2.py3-none-any.whl (5.3 kB)
Collecting retrying
  Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Collecting xmltodict
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Collecting motor>=2.3
  Downloading motor-3.1.1-py3-none-any.whl (56 kB)


In [None]:
!pip install torch torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Imports

In [None]:
import torch
import torchvision

# Load Model

In [None]:
# Run the model on GPU if it is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load a pre-trained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.to(device)
model.eval()

print("Model ready")

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

Model ready


# Load Datasets

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz

dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    dataset_name="evaluate-detections-tutorial",
)
dataset.persistent = True

Migrating database to v0.18.0


INFO:fiftyone.migrations.runner:Migrating database to v0.18.0


Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


INFO:fiftyone.utils.coco:Downloading annotations to '/root/fiftyone/coco-2017/tmp-download/annotations_trainval2017.zip'


 100% |██████|    1.9Gb/1.9Gb [2.5s elapsed, 0s remaining, 748.4Mb/s]      


INFO:eta.core.utils: 100% |██████|    1.9Gb/1.9Gb [2.5s elapsed, 0s remaining, 748.4Mb/s]      


Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_val2017.json'


INFO:fiftyone.utils.coco:Extracting annotations to '/root/fiftyone/coco-2017/raw/instances_val2017.json'


Downloading images to '/root/fiftyone/coco-2017/tmp-download/val2017.zip'


INFO:fiftyone.utils.coco:Downloading images to '/root/fiftyone/coco-2017/tmp-download/val2017.zip'


 100% |██████|    6.1Gb/6.1Gb [8.1s elapsed, 0s remaining, 765.1Mb/s]       


INFO:eta.core.utils: 100% |██████|    6.1Gb/6.1Gb [8.1s elapsed, 0s remaining, 765.1Mb/s]       


Extracting images to '/root/fiftyone/coco-2017/validation/data'


INFO:fiftyone.utils.coco:Extracting images to '/root/fiftyone/coco-2017/validation/data'


Writing annotations to '/root/fiftyone/coco-2017/validation/labels.json'


INFO:fiftyone.utils.coco:Writing annotations to '/root/fiftyone/coco-2017/validation/labels.json'


Dataset info written to '/root/fiftyone/coco-2017/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/coco-2017/info.json'


Loading 'coco-2017' split 'validation'


INFO:fiftyone.zoo.datasets:Loading 'coco-2017' split 'validation'


 100% |███████████████| 5000/5000 [28.8s elapsed, 0s remaining, 162.8 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5000/5000 [28.8s elapsed, 0s remaining, 162.8 samples/s]      


Dataset 'evaluate-detections-tutorial' created


INFO:fiftyone.zoo.datasets:Dataset 'evaluate-detections-tutorial' created


In [None]:
voc_dataset = foz.load_zoo_dataset(
    "voc-2012", 
    split="validation",
    dataset_name="voc-analysis",)

Downloading split 'validation' to '/root/fiftyone/voc-2012/validation'


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/root/fiftyone/voc-2012/validation'


Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to /root/fiftyone/voc-2012/tmp-download/VOCtrainval_11-May-2012.tar


  0%|          | 0/1999639040 [00:00<?, ?it/s]

Extracting /root/fiftyone/voc-2012/tmp-download/VOCtrainval_11-May-2012.tar to /root/fiftyone/voc-2012/tmp-download
 100% |███████████████| 5823/5823 [1.1m elapsed, 0s remaining, 80.4 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5823/5823 [1.1m elapsed, 0s remaining, 80.4 samples/s]      


Dataset info written to '/root/fiftyone/voc-2012/info.json'


INFO:fiftyone.zoo.datasets:Dataset info written to '/root/fiftyone/voc-2012/info.json'


Loading 'voc-2012' split 'validation'


INFO:fiftyone.zoo.datasets:Loading 'voc-2012' split 'validation'


 100% |███████████████| 5823/5823 [15.7s elapsed, 0s remaining, 301.1 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5823/5823 [15.7s elapsed, 0s remaining, 301.1 samples/s]      


Dataset 'voc-analysis' created


INFO:fiftyone.zoo.datasets:Dataset 'voc-analysis' created


In [None]:
session = fo.launch_app(voc_dataset)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Prediction

In [None]:
from PIL import Image
from torchvision.transforms import functional as func

import fiftyone as fo

classes = dataset.default_classes
with fo.ProgressBar() as pb:
    for sample in pb(voc_dataset):
        # Load image
        image = Image.open(sample.filepath)
        image = func.to_tensor(image).to(device)
        c, h, w = image.shape

        # Perform inference
        preds = model([image])[0]
        labels = preds["labels"].cpu().detach().numpy()
        scores = preds["scores"].cpu().detach().numpy()
        boxes = preds["boxes"].cpu().detach().numpy()

        # Convert detections to FiftyOne format
        detections = []
        for label, score, box in zip(labels, scores, boxes):
            # Convert to [top-left-x, top-left-y, width, height]
            # in relative coordinates in [0, 1] x [0, 1]
            x1, y1, x2, y2 = box
            rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]

            detections.append(
                fo.Detection(
                    label=classes[label],
                    bounding_box=rel_box,
                    confidence=score
                )
            )

        # Save predictions to dataset
        sample["predictions_voc"] = fo.Detections(detections=detections)
        sample.save()

print("Finished adding predictions")

 100% |███████████████| 5823/5823 [13.4m elapsed, 0s remaining, 7.8 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5823/5823 [13.4m elapsed, 0s remaining, 7.8 samples/s]      


Finished adding predictions


# Confidence Threshold

In [None]:
from fiftyone import ViewField as F

# Only contains detections with confidence >= 0.5
high_conf_view = voc_dataset.filter_labels("predictions_voc", F("confidence") > 0.5, only_matches=False)

In [None]:
session.view = high_conf_view

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Evaluation

In [None]:
# Evaluate the predictions in the `faster_rcnn` field of our `high_conf_view`
# with respect to the objects in the `ground_truth` field
results = high_conf_view.evaluate_detections(
    "predictions_voc",
    gt_field="ground_truth",
    eval_key="eval",
    compute_mAP=True,
)

Evaluating detections...


INFO:fiftyone.utils.eval.detection:Evaluating detections...


 100% |███████████████| 5823/5823 [1.9m elapsed, 0s remaining, 45.0 samples/s]      


INFO:eta.core.utils: 100% |███████████████| 5823/5823 [1.9m elapsed, 0s remaining, 45.0 samples/s]      


Performing IoU sweep...


INFO:fiftyone.utils.eval.coco:Performing IoU sweep...


 100% |███████████████| 5823/5823 [1.1m elapsed, 0s remaining, 86.5 samples/s]        


INFO:eta.core.utils: 100% |███████████████| 5823/5823 [1.1m elapsed, 0s remaining, 86.5 samples/s]        


In [None]:
ious_sum = 0
for i in range(len(results.ious)):
  if results.ious[i] != None:
    ious_sum += results.ious[i]
ious_sum = ious_sum/len(results.ious)
print(ious_sum)

0.25167512440480405


In [None]:
# Get the 10 most common classes in the dataset
counts = dataset.count_values("ground_truth.detections.label")
classes_top10 = sorted(counts, key=counts.get, reverse=True)[:10]

# Print a classification report for the top-10 classes
results.print_report(classes=classes_top10)

               precision    recall  f1-score   support

       person       0.59      0.91      0.71      5110
          car       0.53      0.80      0.64      1173
        chair       0.41      0.68      0.51      1449
         book       0.00      0.00      0.00         0
       bottle       0.55      0.72      0.62       733
          cup       0.00      0.00      0.00         0
 dining table       0.00      0.00      0.00         0
traffic light       0.00      0.00      0.00         0
         bowl       0.00      0.00      0.00         0
      handbag       0.00      0.00      0.00         0

    micro avg       0.40      0.84      0.54      8465
    macro avg       0.21      0.31      0.25      8465
 weighted avg       0.55      0.84      0.66      8465



In [None]:
print(results.mAP())

0.3481005989436568


In [None]:
eval_view = voc_dataset.load_evaluation_view("eval")
print(eval_view)

Dataset:     voc-analysis
Media type:  image
Num samples: 5823
Sample fields:
    id:              fiftyone.core.fields.ObjectIdField
    filepath:        fiftyone.core.fields.StringField
    tags:            fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    predictions_voc: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    eval_tp:         fiftyone.core.fields.IntField
    eval_fp:         fiftyone.core.fields.IntField
    eval_fn:         fiftyone.core.fields.IntField
View stages:
    1. FilterLabels(field='predictions_voc', filter={'$gt': ['$$this.confidence', 0.5]}, only_matches=False, trajectories=False)


In [None]:
eval_patches = voc_dataset.to_evaluation_patches("eval")
print(eval_patches)

Dataset:     voc-analysis
Media type:  image
Num patches: 146741
Patch fields:
    id:              fiftyone.core.fields.ObjectIdField
    sample_id:       fiftyone.core.fields.ObjectIdField
    filepath:        fiftyone.core.fields.StringField
    tags:            fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    predictions_voc: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    crowd:           fiftyone.core.fields.BooleanField
    type:            fiftyone.core.fields.StringField
    iou:             fiftyone.core.fields.FloatField
View stages:
    1. ToEvaluationPatches(eval_key='eval', config=None)


In [None]:
session.view = eval_patches