In [3]:
import fiftyone as fo
import fiftyone.zoo as foz
import os
import torch
import torch.utils.data
import torchvision
import torchvision.ops 
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from PIL import Image
from torchvision.transforms import functional as func
import cv2
import numpy as np
from detectron2.evaluation.coco_evaluation import instances_to_coco_json
from detectron2.structures import BoxMode

In [4]:
import pickle

# select device (whether GPU or CPU)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

PREDICTION_PATH ='./predictions'
inferencemode = 'COCO-Detection'
expendName = '.yaml'

load_model_names = ['retinanet_R_50_FPN_1x', 'retinanet_R_50_FPN_3x', 'retinanet_R_101_FPN_3x']

load_model_name = 'ensemble'

# load model
cfg = get_cfg()

cfg.merge_from_file(model_zoo.get_config_file(os.path.join(inferencemode, load_model_names[0] + expendName)))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
#Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(os.path.join(inferencemode, load_model_names[0] + expendName))

# load
with open(os.path.join(PREDICTION_PATH, f'{load_model_names[0]}_predictions.pickle'), 'rb') as f:
    model1_predictions = pickle.load(f)

# load
with open(os.path.join(PREDICTION_PATH, f'{load_model_names[1]}_predictions.pickle'), 'rb') as f:
    model2_predictions = pickle.load(f)

# load
with open(os.path.join(PREDICTION_PATH, f'{load_model_names[2]}_predictions.pickle'), 'rb') as f:
    model3_predictions = pickle.load(f)


print("Model ready")

dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    dataset_name="evaluate-ensenble-detections",
)
dataset.persistent = True

# Print some information about the dataset
print(dataset)

# Print a ground truth detection
sample = dataset.first()
print(sample.ground_truth.detections[0])

session = fo.launch_app()

Loading config /home/dongheehan/anaconda3/envs/objectdetection/lib/python3.7/site-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.
Model ready
Downloading split 'validation' to '/home/dongheehan/fiftyone/coco-2017/validation' if necessary
Found annotations at '/home/dongheehan/fiftyone/coco-2017/raw/instances_val2017.json'
Images already downloaded
Existing download of split 'validation' is sufficient
Loading existing dataset 'evaluate-ensenble-detections'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use
Name:        evaluate-ensenble-detections
Media type:  image
Num samples: 5000
Persistent:  True
Tags:        ['validation']
Sample fields:
    id:                fiftyone.core.fields.ObjectIdField
    filepath:          fiftyone.core.fields.StringField
    tags:              fiftyone.core.fields.ListField(fiftyone.core.

In [5]:
from detectron2.data import MetadataCatalog
import pickle

# Choose a random subset of 100 samples to add predictions to
predictions_view = dataset.take(5000,seed=51) # limit size

#Get class list
metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
classes = metadata.get("thing_classes", None)

overlabthreshold = 0.15
predictions= []

# load
with open(os.path.join('rankingperclasschart.pickle'), 'rb') as f:
    rankingperclasschart = pickle.load(f)

In [6]:
'retinanet_R_101_FPN_3x' == rankingperclasschart['person'][0][0]

True

In [7]:
def SingleInference(sample ,predictor, modelname, rankingperclasschart, detections):
     # Perform inference

        image = cv2.imread(sample.filepath)
        h, w, c = image.shape

        filename = os.path.basename(sample.filepath)
        prediction = predictor[filename]

        boxes =  prediction[0]
        labels = prediction[1]
        scores = prediction[2]
        tensor_boxes =  torch.tensor(boxes)
        tensor_scores = torch.tensor(scores)

        results = torchvision.ops.nms(tensor_boxes, tensor_scores, overlabthreshold)

        # Convert detections to FiftyOne format
        
        for nIndex, (label, score, box) in enumerate(zip(labels, scores, boxes)):
            # only use best F1 Score model on class
            if (nIndex in results) is False or (modelname != rankingperclasschart[classes[label]][0][0]):
                continue

            #print(f'class {classes[label]} : {modelname}')
            
            x, y, width, height = box

            x1 = x
            y1 = y
            x2 = x + width
            y2 = y + height
            
            rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]

            detections.append(
                fo.Detection(
                    label=classes[label],
                    bounding_box=rel_box,
                    confidence=score
                )
            )

In [None]:
with fo.ProgressBar() as pb:
    for sample in pb(predictions_view):

        # Load image
        image = cv2.imread(sample.filepath)
        h, w, c = image.shape

        detections = []

        # Perform inference
        SingleInference(sample ,model1_predictions, load_model_names[0], rankingperclasschart, detections)
        #print('by model1 : ', len(detections))
        SingleInference(sample, model2_predictions, load_model_names[1], rankingperclasschart, detections)
        #print('by model2 : ', len(detections))
        SingleInference(sample, model3_predictions, load_model_names[2], rankingperclasschart, detections)        
        #print('by model3 : ', len(detections))

        # Save predictions to dataset
        sample[load_model_name] = fo.Detections(detections=detections)
        sample.save()


print("Finished adding predictions")
session.view = None
session.view = predictions_view
session.show()

In [9]:
fiftyone_classes = dataset.default_classes

#Get class list
metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
COCO_classes = metadata.get("thing_classes", None)

In [10]:
from fiftyone import ViewField as F

# Only contains detections with confidence >= 0.75
high_conf_view = dataset.filter_labels(load_model_name, F("confidence") > 0.15)

In [11]:
print(high_conf_view)

Dataset:     evaluate-ensenble-detections
Media type:  image
Num samples: 4985
Tags:        ['validation']
Sample fields:
    id:                fiftyone.core.fields.ObjectIdField
    filepath:          fiftyone.core.fields.StringField
    tags:              fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:          fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    ground_truth:      fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    ensemble:          fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    eval4_tp:          fiftyone.core.fields.IntField
    eval4_fp:          fiftyone.core.fields.IntField
    eval4_fn:          fiftyone.core.fields.IntField
    eval_union_tp:     fiftyone.core.fields.IntField
    eval_union_fp:     fiftyone.core.fields.IntField
    eval_union_fn:     fiftyone.core.fields.IntField
    eval_f1ranking_tp: fiftyone.core.fields.IntField
   

In [12]:
sample = high_conf_view.first()
print(sample.ensemble.detections[0])

<Detection: {
    'id': '62a505c279f516bc805a24b1',
    'attributes': BaseDict({}),
    'tags': BaseList([]),
    'label': 'tv',
    'bounding_box': BaseList([
        0.009705719351768494,
        0.38865435962945644,
        0.23089201450347902,
        0.22951249673332966,
    ]),
    'mask': None,
    'confidence': 0.9027981162071228,
    'index': None,
}>


In [None]:
session.view = high_conf_view

In [14]:
eval_key="eval_retinanet_r_50_fpn_1x"

results = high_conf_view.evaluate_detections(
    load_model_name,
    gt_field="ground_truth",
    eval_key=eval_key,
    compute_mAP=True,
)

Evaluating detections...
 100% |███████████████| 4985/4985 [5.7m elapsed, 0s remaining, 14.3 samples/s]      
Performing IoU sweep...
 100% |███████████████| 4985/4985 [3.0m elapsed, 0s remaining, 26.5 samples/s]      


In [15]:
print(dataset.get_evaluation_info(eval_key))

{
    "key": "eval_retinanet_r_50_fpn_1x",
    "version": "0.16.2",
    "timestamp": "2022-06-11T21:17:10.859000",
    "config": {
        "method": "coco",
        "cls": "fiftyone.utils.eval.coco.COCOEvaluationConfig",
        "pred_field": "ensemble",
        "gt_field": "ground_truth",
        "iou": 0.5,
        "classwise": true,
        "iscrowd": "iscrowd",
        "use_masks": false,
        "use_boxes": false,
        "tolerance": null,
        "compute_mAP": true,
        "iou_threshs": [
            0.5,
            0.55,
            0.6,
            0.65,
            0.7,
            0.75,
            0.8,
            0.85,
            0.9,
            0.95
        ],
        "max_preds": 100,
        "error_level": 1
    }
}


In [16]:
# Get the 10 most common classes in the dataset
counts = dataset.count_values("ground_truth.detections.label")
classes_top = sorted(counts, key=counts.get, reverse=True)

# Print a classification report for the top-10 classes
results.print_report(classes=classes_top)

                precision    recall  f1-score   support

        person       0.51      0.92      0.66     17204
           car       0.30      0.84      0.45      2384
         chair       0.22      0.70      0.33      2035
          book       0.39      0.85      0.54      2749
        bottle       0.30      0.80      0.43      1413
           cup       0.27      0.77      0.40       940
  dining table       0.12      0.66      0.21       708
 traffic light       0.18      0.69      0.28       656
          bowl       0.21      0.77      0.33       656
       handbag       0.13      0.59      0.21       540
          bird       0.39      0.83      0.53       832
          boat       0.27      0.80      0.40       599
         truck       0.18      0.80      0.30       419
         bench       0.14      0.64      0.23       501
      umbrella       0.33      0.81      0.47       499
           cow       0.46      0.87      0.60       541
        banana       0.38      0.81      0.51  

In [17]:
print(results.mAP())

0.3931411489089599
