In [3]:
import fiftyone as fo
import fiftyone.zoo as foz
import os
import torch
import torch.utils.data
import torchvision
import torchvision.ops 
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from PIL import Image
from torchvision.transforms import functional as func
import cv2
import numpy as np
from detectron2.evaluation.coco_evaluation import instances_to_coco_json
from detectron2.structures import BoxMode
import pickle

In [None]:
# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

inferencemode = 'COCO-Detection'
expendName = '.yaml'
load_model_name = 'retinanet_R_50_FPN_3x'

# load model
cfg = get_cfg()

cfg.merge_from_file(model_zoo.get_config_file(os.path.join(inferencemode, load_model_name + expendName)))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
#Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(os.path.join(inferencemode, load_model_name + expendName))

PREDICTION_PATH = './predictions'
# load
with open(os.path.join(PREDICTION_PATH, f'{load_model_name}_predictions.pickle'), 'rb') as f:
    model_predictions = pickle.load(f)
    
print("Model ready")

dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    dataset_name="evaluate-detections-tutorial",
)
dataset.persistent = True

# Print some information about the dataset
print(dataset)

# Print a ground truth detection
sample = dataset.first()
print(sample.ground_truth.detections[0])

session = fo.launch_app()

In [None]:
from detectron2.data import MetadataCatalog
import pickle

# Choose a random subset of 100 samples to add predictions to
predictions_view = dataset.take(5000,seed=51) # limit size

## Get class list
#classes = dataset.default_classes

#Get class list
metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
classes = metadata.get("thing_classes", None)

overlabthreshold = 0.15

# Add predictions to samples
with fo.ProgressBar() as pb:
    for sample in pb(predictions_view):

        # Load image
        image = cv2.imread(sample.filepath)
        h, w, c = image.shape

                # Perform inference
        filename = os.path.basename(sample.filepath)
        prediction = model_predictions[filename]

        boxes =  prediction[0]
        labels = prediction[1]
        scores = prediction[2]
        
        tensor_boxes =  torch.tensor(boxes)
        tensor_scores = torch.tensor(scores)
        results = torchvision.ops.nms(tensor_boxes, tensor_scores, overlabthreshold)
        
        # Convert detections to FiftyOne format
        detections = []

        for nIndex, (label, score, box) in enumerate(zip(labels, scores, boxes)):

            if (nIndex in results) is False:
                continue

            x, y, width, height = box

            x1 = x
            y1 = y
            x2 = x + width
            y2 = y + height
            
            rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]

            detections.append(
                fo.Detection(
                    label=classes[label],
                    bounding_box=rel_box,
                    confidence=score
                )
            )
        sample[load_model_name] = fo.Detections(detections=detections)
        sample.save()


print("Finished adding predictions")
session.view = None
session.view = predictions_view
session.show()

In [6]:
fiftyone_classes = dataset.default_classes

#Get class list
metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
COCO_classes = metadata.get("thing_classes", None)

In [7]:
print(fiftyone_classes)

['0', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '12', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '26', 'backpack', 'umbrella', '29', '30', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '45', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '66', 'dining table', '68', '69', 'toilet', '71', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '83', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [8]:
print(COCO_classes)

['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']


In [9]:
from fiftyone import ViewField as F

# Only contains detections with confidence >= 0.75
high_conf_view = dataset.filter_labels(load_model_name, F("confidence") > 0.15)

In [10]:
print(high_conf_view)

Dataset:     evaluate-detections-tutorial
Media type:  image
Num samples: 4990
Tags:        ['validation']
Sample fields:
    id:                        fiftyone.core.fields.ObjectIdField
    filepath:                  fiftyone.core.fields.StringField
    tags:                      fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:                  fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:              fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    retinanet_R_50_FPN_1x:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    retinanet_R_50_FPN_3x:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    retinanet_R_101_FPN_3x:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    eval2_tp:                  fiftyone.core.fields.IntField
    eval2_fp:                  fiftyone.core.fields

In [11]:
sample = high_conf_view.first()
print(sample.retinanet_R_50_FPN_3x.detections[0])

<Detection: {
    'id': '62a4fc0378ba4622f7c0cd09',
    'attributes': BaseDict({}),
    'tags': BaseList([]),
    'label': 'tv',
    'bounding_box': BaseList([
        0.005723569169640541,
        0.38934000221216625,
        0.23573710918426513,
        0.2319750360479937,
    ]),
    'mask': None,
    'confidence': 0.8734915256500244,
    'index': None,
}>


In [None]:
session.view = high_conf_view

In [13]:
eval_key="retinenet_R_50_fpn_3x"

results = high_conf_view.evaluate_detections(
    load_model_name,
    gt_field="ground_truth",
    eval_key=eval_key,
    compute_mAP=True,
)

Evaluating detections...
 100% |███████████████| 4990/4990 [6.9m elapsed, 0s remaining, 10.3 samples/s]      
Performing IoU sweep...
 100% |███████████████| 4990/4990 [3.5m elapsed, 0s remaining, 23.9 samples/s]      


In [14]:
print(dataset.get_evaluation_info(eval_key))

{
    "key": "retinenet_R_50_fpn_3x",
    "version": "0.16.2",
    "timestamp": "2022-06-11T20:38:02.135000",
    "config": {
        "method": "coco",
        "cls": "fiftyone.utils.eval.coco.COCOEvaluationConfig",
        "pred_field": "retinanet_R_50_FPN_3x",
        "gt_field": "ground_truth",
        "iou": 0.5,
        "classwise": true,
        "iscrowd": "iscrowd",
        "use_masks": false,
        "use_boxes": false,
        "tolerance": null,
        "compute_mAP": true,
        "iou_threshs": [
            0.5,
            0.55,
            0.6,
            0.65,
            0.7,
            0.75,
            0.8,
            0.85,
            0.9,
            0.95
        ],
        "max_preds": 100,
        "error_level": 1
    }
}


In [15]:
# Get the 10 most common classes in the dataset
counts = dataset.count_values("ground_truth.detections.label")
classes_top = sorted(counts, key=counts.get, reverse=True)

# Print a classification report for the top-10 classes
results.print_report(classes=classes_top)

                precision    recall  f1-score   support

        person       0.48      0.92      0.63     17445
           car       0.26      0.85      0.40      2383
         chair       0.20      0.71      0.31      2110
          book       0.36      0.85      0.51      2832
        bottle       0.27      0.82      0.40      1374
           cup       0.22      0.78      0.35       952
  dining table       0.11      0.66      0.19       719
 traffic light       0.18      0.69      0.28       656
          bowl       0.19      0.78      0.30       653
       handbag       0.10      0.57      0.18       540
          bird       0.38      0.82      0.52       845
          boat       0.24      0.81      0.37       605
         truck       0.17      0.84      0.28       419
      umbrella       0.30      0.79      0.44       523
         bench       0.10      0.56      0.16       437
           cow       0.42      0.88      0.57       545
        banana       0.34      0.83      0.48  

In [16]:
print(results.mAP())

0.3767741160155552
