Performance Measurement using FiftyOne
===


In [1]:
import os
import json

import fiftyone as fo
from PIL import Image
import pandas as pd

import torch
import torchvision

  from .autonotebook import tqdm as notebook_tqdm


### Faster-RCNN results

In [2]:
res_dir = "results/faster_rcnn"

label2idx = {
    "person": 1, "car": 2, "bicycle": 3,
    "motorcycle": 4, "bus": 5, "truck": 6,
}

ds_name = "faster_rcnn_predictions"
if ds_name in fo.list_datasets():
    fo.delete_dataset(ds_name)

dataset = fo.Dataset(name=ds_name)

In [3]:


df = pd.read_csv("data/data.csv")
df_test = df[df["tag"] == "test"]

for _, row in df_test.iterrows():
    sample = fo.Sample(filepath=row["imagepath"])

    filename = os.path.basename(row["imagepath"])
    img = Image.open(row["imagepath"])
    img_width, img_height = img.size

    gt_file = row["labelpath"]
    with open(gt_file, "rt") as f:
        gt = json.load(f)

    # Add ground truth
    list_gt = []
    for obj in gt:
        label = obj["label"]
        box = [
            obj["x"] / img_width, obj["y"] / img_height,
            obj["width"] / img_width, obj["height"] / img_height
        ]
        detection = fo.Detection(
            label=label,
            bounding_box=box
        )
        list_gt.append(detection)
    sample["ground_truth"] = fo.Detections(detections=list_gt)

    # Add predictions
    pred_file = os.path.join(res_dir, filename.replace(".jpg", ".json"))
    pred = json.load(open(pred_file, "rt"))

    labels = pred["labels"]
    boxes_xyxy = pred["boxes_xyxy"]
    scores = pred["scores"]
    # convert box format from xyxy to xywh
    boxes_xywh = torchvision.ops.box_convert(
        torch.tensor(boxes_xyxy),
        in_fmt="xyxy",
        out_fmt="xywh"
    )

    list_detections = []
    for label, box, score in zip(labels, boxes_xywh, scores):
        box = [
            box[0] / img_width, box[1] / img_height,
            box[2] / img_width, box[3] / img_height
        ]
        label = list(label2idx.keys())[list(label2idx.values()).index(label)]
        detection = fo.Detection(label=label, bounding_box=box, confidence=score)
        list_detections.append(detection)

    sample["faster_rcnn"] = fo.Detections(detections=list_detections)
    dataset.add_sample(sample)

dataset.stats()

{'samples_count': 2968,
 'samples_bytes': 19199380,
 'samples_size': '18.3MB',
 'total_bytes': 19199380,
 'total_size': '18.3MB'}

In [4]:
from fiftyone import ViewField as F

# Only contains detections with confidence >= 0.75
high_conf_view = dataset.filter_labels(
    "faster_rcnn", F("confidence") > 0.75, only_matches=False
)

session = fo.launch_app(high_conf_view, auto=False)
session

Connected to FiftyOne on port 5151 at localhost.
If you are not connecting to a remote session, you may need to start a new session and specify a port
Session launched. Run `session.show()` to open the App in a cell output.


Dataset:          faster_rcnn_predictions
Media type:       image
Num samples:      2968
Selected samples: 0
Selected labels:  0
Session URL:      http://localhost:5151/
View stages:
    1. FilterLabels(field='faster_rcnn', filter={'$gt': ['$$this.confidence', 0.75]}, only_matches=False, trajectories=False)

In [5]:
results = high_conf_view.evaluate_detections(
    "faster_rcnn",
    gt_field="ground_truth",
    eval_key="eval",
    compute_mAP=True,
)

Evaluating detections...
 100% |███████████████| 2968/2968 [15.9s elapsed, 0s remaining, 199.7 samples/s]      
Performing IoU sweep...
 100% |███████████████| 2968/2968 [10.0s elapsed, 0s remaining, 324.3 samples/s]      


In [6]:
# Get the 10 most common classes in the dataset
if len(label2idx) > 10:
    counts = dataset.count_values("ground_truth.detections.label")
    classes_top10 = sorted(counts, key=counts.get, reverse=True)[:10]
else:
    classes_top10 = list(label2idx.keys())
    
# Print a classification report for the top-10 classes
results.print_report(classes=classes_top10)

              precision    recall  f1-score   support

      person       0.68      0.67      0.67     11004
         car       0.75      0.40      0.52      1932
     bicycle       0.73      0.19      0.31       316
  motorcycle       0.95      0.19      0.31       371
         bus       0.75      0.55      0.63       285
       truck       0.69      0.12      0.21       415

   micro avg       0.68      0.60      0.64     14323
   macro avg       0.76      0.35      0.44     14323
weighted avg       0.70      0.60      0.62     14323



### YOLOv8 results

In [7]:
res_dir = "results/yolov8"

# for yolo, labels start from 0
label2idx = {
    "person": 0, "car": 1, "bicycle": 2,
    "motorcycle": 3, "bus": 4, "truck": 5
}

ds_name = "yolo_predictions"
if ds_name in fo.list_datasets():
    fo.delete_dataset(ds_name)

dataset = fo.Dataset(name=ds_name)

In [8]:
df = pd.read_csv("data/data.csv")
df_test = df[df["tag"] == "test"]

for _, row in df_test.iterrows():
    sample = fo.Sample(filepath=row["imagepath"])

    filename = os.path.basename(row["imagepath"])
    img = Image.open(row["imagepath"])
    img_width, img_height = img.size

    gt_file = row["labelpath"]
    with open(gt_file, "rt") as f:
        gt = json.load(f)

    # Add ground truth
    list_gt = []
    for obj in gt:
        label = obj["label"]
        box = [
            obj["x"] / img_width, obj["y"] / img_height,
            obj["width"] / img_width, obj["height"] / img_height
        ]
        detection = fo.Detection(
            label=label,
            bounding_box=box
        )
        list_gt.append(detection)
    sample["ground_truth"] = fo.Detections(detections=list_gt)

    # Add predictions
    pred_file = os.path.join(res_dir, filename.replace(".jpg", ".json"))
    pred = json.load(open(pred_file, "rt"))

    labels = pred["labels"]
    boxes_xywhn = pred["boxes_xywhn"]
    scores = pred["scores"]

    list_detections = []
    for label, box, score in zip(labels, boxes_xywhn, scores):
        box = [box[0], box[1], box[2], box[3]]
        label = list(label2idx.keys())[list(label2idx.values()).index(label)]
        detection = fo.Detection(label=label, bounding_box=box, confidence=score)
        list_detections.append(detection)

    sample["yolov8"] = fo.Detections(detections=list_detections)
    dataset.add_sample(sample)


dataset.stats()

{'samples_count': 2968,
 'samples_bytes': 4922858,
 'samples_size': '4.7MB',
 'total_bytes': 4922858,
 'total_size': '4.7MB'}

In [9]:
from fiftyone import ViewField as F

# Only contains detections with confidence >= 0.75
high_conf_view = dataset.filter_labels(
    "yolov8", F("confidence") > 0.75, only_matches=False
)

session = fo.launch_app(high_conf_view, auto=False)
session

Session launched. Run `session.show()` to open the App in a cell output.


Dataset:          yolo_predictions
Media type:       image
Num samples:      2968
Selected samples: 0
Selected labels:  0
Session URL:      http://localhost:5151/
View stages:
    1. FilterLabels(field='yolov8', filter={'$gt': ['$$this.confidence', 0.75]}, only_matches=False, trajectories=False)

In [10]:
results = dataset.evaluate_detections(
    "yolov8",
    gt_field="ground_truth",
    eval_key="eval",
    compute_mAP=True,
)

Evaluating detections...
 100% |███████████████| 2968/2968 [9.9s elapsed, 0s remaining, 336.2 samples/s]       
Performing IoU sweep...
 100% |███████████████| 2968/2968 [9.0s elapsed, 0s remaining, 361.5 samples/s]       


In [11]:
# Get the 10 most common classes in the dataset
if len(label2idx) > 10:
    counts = dataset.count_values("ground_truth.detections.label")
    classes_top10 = sorted(counts, key=counts.get, reverse=True)[:10]
else:
    classes_top10 = list(label2idx.keys())

# Print a classification report for the top-10 classes
results.print_report(classes=classes_top10)

              precision    recall  f1-score   support

      person       0.41      0.34      0.37     11004
         car       0.39      0.24      0.29      1932
     bicycle       0.24      0.10      0.14       316
  motorcycle       0.29      0.13      0.18       371
         bus       0.33      0.31      0.32       285
       truck       0.29      0.04      0.07       415

   micro avg       0.40      0.30      0.34     14323
   macro avg       0.32      0.19      0.23     14323
weighted avg       0.39      0.30      0.34     14323

