In [1]:
import os, io, glob
from functools import partial

import ffmpeg
import cv2
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
from tqdm.notebook import tqdm

import fiftyone as fo
import fiftyone.brain as fob
from fiftyone import ViewField as F
from fiftyone.utils import yolo
import fiftyone.utils.annotations as foua

import torch

# Download Dataset and Weights

In [2]:
import lego_utils

dataset_url = "https://s3.wasabisys.com/blender-rudy-set-background/dataset_1.tgz"
yolo_weights_url = "https://github.com/LilDataMonster/Lego-CNN/releases/download/v0.0.3-yolo/exp12.tgz"
mrcnn_weights_url = "https://github.com/LilDataMonster/Lego-CNN/releases/download/v0.0.3/mask_rcnn_lego_0200.h5"

base_asset_path = os.path.join("logs")
weights_path = os.path.join(base_asset_path, "weights")
dataset_path = os.path.join(base_asset_path, "dataset")

lego_utils.download_url(yolo_weights_url, weights_path, force_download=False)
lego_utils.download_url(dataset_url, dataset_path, force_download=False)

'logs/weights/exp12.tgz' already exists. Skipping... Use 'force_download=True' to overwrite.
'logs/dataset/dataset_1.tgz' already exists. Skipping... Use 'force_download=True' to overwrite.


# Delete all datasets from database

In [3]:
delete_database = False

if delete_database:
    for ds in fo.list_datasets():
        dataset = fo.load_dataset(ds)
        dataset.delete()
        print(f"Removed from database: {ds}")

## Load Dataset

In [4]:
# loaded datasets
datasets = []

# load all coco datasets within the dataset_path
for ds_subdir in os.listdir(dataset_path):
    ds_fullpath = os.path.join(dataset_path, ds_subdir)
    if os.path.isdir(ds_fullpath) and not ds_subdir.startswith("."):
        
        # process by dataset splits
        for ds_split in os.listdir(ds_fullpath):
            ds_split_fullpath = os.path.join(ds_fullpath, ds_split)
            
            name = f"{ds_subdir} - {ds_split}"
            
            print(f"Loading dataset {name}...")
            if name not in fo.list_datasets():
                data_path = ds_split_fullpath
                labels_path = os.path.join(data_path, "coco_annotations.json")
                dataset_type = fo.types.COCODetectionDataset
                
                # load in dataset
                dataset = fo.Dataset.from_dir(
                    dataset_type=dataset_type,
                    data_path=data_path,
                    labels_path=labels_path,
                    name=name
                )
                
                # save dataset to database for persistence
                dataset.persistent = True
            else:
                # dataset already save and persisted, load it from prior save
                dataset = fo.load_dataset(name)
            print(f"Dataset: {name} loaded")
            datasets.append(dataset)

Loading dataset dataset_1 - test...
Dataset: dataset_1 - test loaded
Loading dataset dataset_1 - train...
Dataset: dataset_1 - train loaded


In [7]:
# visualize dataset in GUI
session = fo.launch_app(port=5151, auto=False)#, remote=True)

Session launched. Run `session.show()` to open the App in a cell output.


Uncaught exception, closing connection.
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/tornado/iostream.py", line 700, in _handle_events
    self._handle_write()
  File "/usr/local/lib/python3.8/dist-packages/tornado/iostream.py", line 974, in _handle_write
    self._write_buffer.advance(num_bytes)
  File "/usr/local/lib/python3.8/dist-packages/tornado/iostream.py", line 183, in advance
    assert 0 < size <= self._size
AssertionError
Exception in callback None()
handle: <Handle cancelled>
Traceback (most recent call last):
  File "/usr/lib/python3.8/asyncio/events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/local/lib/python3.8/dist-packages/tornado/platform/asyncio.py", line 189, in _handle_events
    handler_func(fileobj, events)
  File "/usr/local/lib/python3.8/dist-packages/tornado/iostream.py", line 700, in _handle_events
    self._handle_write()
  File "/usr/local/lib/python3.8/dist-packages/tornado/iost

ERROR:tornado.general:Uncaught exception, closing connection.
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/tornado/iostream.py", line 700, in _handle_events
    self._handle_write()
  File "/usr/local/lib/python3.8/dist-packages/tornado/iostream.py", line 974, in _handle_write
    self._write_buffer.advance(num_bytes)
  File "/usr/local/lib/python3.8/dist-packages/tornado/iostream.py", line 183, in advance
    assert 0 < size <= self._size
AssertionError
Exception in thread Thread-7:
Traceback (most recent call last):
  File "/usr/lib/python3.8/threading.py", line 932, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.8/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.8/dist-packages/fiftyone/core/client.py", line 125, in run_client
    io_loop.run_sync(connect)
  File "/usr/local/lib/python3.8/dist-packages/tornado/ioloop.py", line 530, in run_sync
    return future_cell[0].resul

## Load YOLO Model

In [24]:
# download 
yolov5_model = torch.hub.load('ultralytics/yolov5', 'custom', path=os.path.join(weights_path, "exp12", "weights", "best.pt"))

URLError: <urlopen error [Errno -3] Temporary failure in name resolution>

In [5]:
create_detections = lambda row: fo.Detection(
                        label=row['name'],
                        bounding_box=[row['xcenter'] - row['width']/2, row['ycenter'] - row['height']/2,
                                      row['width'], row['height']],
                        confidence=row['confidence'])

In [6]:
# Choose a random subset of 100 samples to add predictions to
predictions_view = dataset.take(2500, seed=51)

for sample in tqdm(predictions_view):
    # Load image
    image = Image.open(sample.filepath)
    results = yolov5_model(image)
    results_df = results.pandas().xywhn[0]
    
    detections = results_df.apply(create_detections, axis=1)
    sample['YOLOv5'] = fo.Detections(detections=detections.values.tolist())
    sample.save()

print("Finished adding predictions")

  0%|          | 0/2500 [00:00<?, ?it/s]

Finished adding predictions


In [7]:
session.view = predictions_view

In [None]:
classes = ['2431', '3003', '3005', '3010', '3020', '3021', '3022',
           '3023', '3024', '3069', '3070', '3176', '3622', '3700',
           '3710', '3958', '4150', '4274', '6141', '11211', '11476',
           '11477', '15068', '15573', '22885', '24201', '24246',
           '25269', '29119', '29120', '33909', '35480', '36840',
           '47458', '47905', '85984', '87079', '87087', '87580',
           '93273', '98138', '99206']

pred_fields = [
    'YOLOv5',
]

# logging
logs_dir = os.path.join('logs', dataset.name)
os.makedirs(logs_dir, exist_ok=True)

classification_metrics_summary_df = None
for pred_field in pred_fields:
    print(f'Processing {pred_field}')

    # Only contains detections with confidence
    high_conf_view = predictions_view.filter_labels(pred_field, F('confidence') > 0.60)

    results = high_conf_view.evaluate_detections(
        pred_field=pred_field,
        gt_field='ground_truth',
        eval_key=f'eval_{pred_field}',
        compute_mAP=True,
        classwise=False
    )

    classification_report_df = pd.DataFrame(results.report(classes)).transpose()
    classification_report_df.to_csv(os.path.join(logs_dir, f'{pred_field}_classification_report.csv'))

    classification_metrics_df = pd.DataFrame([results.metrics()], index=[pred_field])
    classification_metrics_df['mAP'] = results.mAP()
    classification_metrics_summary_df = classification_metrics_df if classification_metrics_summary_df is None else classification_metrics_summary_df.append(classification_metrics_df)

    # save pr curve
    pr_plot = results.plot_pr_curves(classes=classes, backend='matplotlib', figsize=(16,8))
    pr_plot.savefig(f'{pred_field}_pr_curve.png', bbox_inches='tight')
    # # plot pr curve
    # plot = results.plot_pr_curves(classes=classes)
    # plot._figure.write_json(os.path.join(logs_dir, f'{pred_field}_pr_curve.json'))
    # plot._figure.write_html(os.path.join(logs_dir, f'{pred_field}_pr_curve.html'))
    # # plot.show(height=720, width=720)

    # save as matplotlib confusion matrix
    # plot = results.plot_pr_curves(classes=classes, backend='matplotlib', figsize=(16,8))
    # plot.savefig('pr_plot.png', bbox_inches='tight')
    cm = results.confusion_matrix(classes=classes + [results.missing])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes + ['(NONE)'])

    fig, ax = plt.subplots(figsize=(16,16))
    disp.plot(ax=ax)
    ax.tick_params(axis='x', labelrotation=90)
    fig.savefig(os.path.join(logs_dir, f'{pred_field}_confusion_matrix.png'), bbox_inches='tight')
    # plt.show()

    plot = results.plot_confusion_matrix(classes=classes)
    plot._figure.write_json(os.path.join(logs_dir, f'{pred_field}_confusion_matrix.json'))
    plot._figure.write_html(os.path.join(logs_dir, f'{pred_field}_confusion_matrix.html'))
    # plot.show(height=720, width=720)

classification_metrics_summary_df.to_csv(os.path.join(logs_dir, 'classification_metrics.csv'))

Processing YOLOv5
Evaluating detections...
 100% |███████████████| 2500/2500 [3.3m elapsed, 0s remaining, 13.2 samples/s]      
Performing IoU sweep...
 100% |███████████████| 2500/2500 [3.2m elapsed, 0s remaining, 12.3 samples/s]      


## Organize View by Mistakenness Score

In [1]:
# compute and add "mistakenness" score ranging between [-1, 1] (no mistakes, high mistakes)
fob.compute_mistakenness(
    predictions_view, 'YOLOv5', label_field='ground_truth', 
)

# set view to be based on mistakenness score
mistake_view = dataset.sort_by('mistakenness', reverse=True)
session.view = mistake_view

NameError: name 'fob' is not defined

## Embedding Visualizations

In [None]:
# Generate visualization for `ground_truth` objects
method = 'umap' #'umap' 'tsne' 'pca'
vis_results = fob.compute_visualization(predictions_view, patches_field="ground_truth", method=method)

In [None]:
 # Generate scatterplot
bbox_area = F('bounding_box')[2] * F('bounding_box')[3]
plot = vis_results.visualize(
    labels=F('ground_truth.detections.label'),
    sizes=F('ground_truth.detections[]').apply(bbox_area),
)
plot.show(height=800)
session.plots.attach(plot)