In [1]:
import os, io, glob
from functools import partial

import ffmpeg
import cv2
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
from tqdm.notebook import tqdm

import fiftyone as fo
import fiftyone.brain as fob
from fiftyone import ViewField as F
from fiftyone.utils import yolo
import fiftyone.utils.annotations as foua

import torch

Migrating database to v0.16.5


# Download Dataset and Weights

In [2]:
import lego_utils

dataset_url = "https://s3.wasabisys.com/blender-rudy-set-background/dataset_1.tgz"
yolo_weights_url = "https://github.com/LilDataMonster/Lego-CNN/releases/download/v0.0.3-yolo/exp12.tgz"
mrcnn_weights_url = "https://github.com/LilDataMonster/Lego-CNN/releases/download/v0.0.3/mask_rcnn_lego_0200.h5"

base_asset_path = os.path.join("logs")
weights_path = os.path.join(base_asset_path, "weights")
dataset_path = os.path.join(base_asset_path, "dataset")

lego_utils.download_url(yolo_weights_url, weights_path, force_download=False)
lego_utils.download_url(dataset_url, dataset_path, force_download=False)

'logs/weights/exp12.tgz' already exists. Skipping... Use 'force_download=True' to overwrite.
'logs/dataset/dataset_1.tgz' already exists. Skipping... Use 'force_download=True' to overwrite.


# Delete all datasets from database

In [3]:
delete_database = False

if delete_database:
    for ds in fo.list_datasets():
        dataset = fo.load_dataset(ds)
        dataset.delete()
        print(f"Removed from database: {ds}")

## Load Dataset

In [4]:
# loaded datasets
datasets = []

# load all coco datasets within the dataset_path
for ds_subdir in os.listdir(dataset_path):
    ds_fullpath = os.path.join(dataset_path, ds_subdir)
    if os.path.isdir(ds_fullpath) and not ds_subdir.startswith("."):
        
        # process by dataset splits
        for ds_split in os.listdir(ds_fullpath):
            ds_split_fullpath = os.path.join(ds_fullpath, ds_split)
            
            name = f"{ds_subdir} - {ds_split}"
            
            print(f"Loading dataset {name}...")
            if name not in fo.list_datasets():
                data_path = ds_split_fullpath
                labels_path = os.path.join(data_path, "coco_annotations.json")
                dataset_type = fo.types.COCODetectionDataset
                
                # load in dataset
                dataset = fo.Dataset.from_dir(
                    dataset_type=dataset_type,
                    data_path=data_path,
                    labels_path=labels_path,
                    name=name
                )
                
                # save dataset to database for persistence
                dataset.persistent = True
            else:
                # dataset already save and persisted, load it from prior save
                dataset = fo.load_dataset(name)
            print(f"Dataset: {name} loaded")
            datasets.append(dataset)

Loading dataset dataset_1 - test...
 100% |███████████████| 2500/2500 [44.2s elapsed, 0s remaining, 53.8 samples/s]      
Dataset: dataset_1 - test loaded
Loading dataset dataset_1 - train...
 100% |███████████████| 2501/2501 [43.7s elapsed, 0s remaining, 60.1 samples/s]      
Dataset: dataset_1 - train loaded


In [5]:
# visualize dataset in GUI
session = fo.launch_app(port=5151, auto=False)#, remote=True)

Session launched. Run `session.show()` to open the App in a cell output.


## Load YOLO Model

In [6]:
# download 
yolov5_model = torch.hub.load('ultralytics/yolov5', 'custom', path=os.path.join(weights_path, "exp12", "weights", "best.pt"))

Downloading: "https://github.com/ultralytics/yolov5/archive/master.zip" to /root/.cache/torch/hub/master.zip


YOLOv5 🚀 2022-6-26 Python-3.8.10 torch-1.11.0+cu113 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 11019MiB)



YOLOv5 🚀 2022-6-26 Python-3.8.10 torch-1.11.0+cu113 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 11019MiB)



Fusing layers... 


Fusing layers... 


Model summary: 290 layers, 21018615 parameters, 0 gradients


Model summary: 290 layers, 21018615 parameters, 0 gradients


Adding AutoShape... 


Adding AutoShape... 


In [7]:
create_detections = lambda row: fo.Detection(
                        label=row['name'],
                        bounding_box=[row['xcenter'] - row['width']/2, row['ycenter'] - row['height']/2,
                                      row['width'], row['height']],
                        confidence=row['confidence'])

In [8]:
# Choose a random subset of 100 samples to add predictions to
predictions_view = dataset.take(2500, seed=51)

for sample in tqdm(predictions_view):
    # Load image
    image = Image.open(sample.filepath)
    results = yolov5_model(image)
    results_df = results.pandas().xywhn[0]
    
    detections = results_df.apply(create_detections, axis=1)
    sample['YOLOv5'] = fo.Detections(detections=detections.values.tolist())
    sample.save()

print("Finished adding predictions")

  0%|          | 0/2500 [00:00<?, ?it/s]

Finished adding predictions


In [9]:
session.view = predictions_view

In [12]:
classes = ['2431', '3003', '3005', '3010', '3020', '3021', '3022',
           '3023', '3024', '3069', '3070', '3176', '3622', '3700',
           '3710', '3958', '4150', '4274', '6141', '11211', '11476',
           '11477', '15068', '15573', '22885', '24201', '24246',
           '25269', '29119', '29120', '33909', '35480', '36840',
           '47458', '47905', '85984', '87079', '87087', '87580',
           '93273', '98138', '99206']

pred_fields = [
    'YOLOv5',
]

# logging
logs_dir = os.path.join('logs', dataset.name)
os.makedirs(logs_dir, exist_ok=True)

classification_metrics_summary_df = None
for pred_field in pred_fields:
    print(f'Processing {pred_field}')

    # Only contains detections with confidence
    high_conf_view = predictions_view.filter_labels(pred_field, F('confidence') > 0.60)

    results = high_conf_view.evaluate_detections(
        pred_field=pred_field,
        gt_field='ground_truth',
        eval_key=f'eval_{pred_field}',
        compute_mAP=True,
        classwise=False
    )

    classification_report_df = pd.DataFrame(results.report(classes)).transpose()
    classification_report_df.to_csv(os.path.join(logs_dir, f'{pred_field}_classification_report.csv'))

    classification_metrics_df = pd.DataFrame([results.metrics()], index=[pred_field])
    classification_metrics_df['mAP'] = results.mAP()
    classification_metrics_summary_df = classification_metrics_df if classification_metrics_summary_df is None else classification_metrics_summary_df.append(classification_metrics_df)

    # save pr curve
    pr_plot = results.plot_pr_curves(classes=classes, backend='matplotlib', figsize=(16,8))
    pr_plot.savefig(os.path.join(logs_dir, f'{pred_field}_pr_curve.png'), bbox_inches='tight')
    # # plot pr curve
    # plot = results.plot_pr_curves(classes=classes)
    # plot._figure.write_json(os.path.join(logs_dir, f'{pred_field}_pr_curve.json'))
    # plot._figure.write_html(os.path.join(logs_dir, f'{pred_field}_pr_curve.html'))
    # # plot.show(height=720, width=720)

    # save as matplotlib confusion matrix
    # plot = results.plot_pr_curves(classes=classes, backend='matplotlib', figsize=(16,8))
    # plot.savefig('pr_plot.png', bbox_inches='tight')
    cm = results.confusion_matrix(classes=classes + [results.missing])
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes + ['(NONE)'])

    fig, ax = plt.subplots(figsize=(16,16))
    disp.plot(ax=ax)
    ax.tick_params(axis='x', labelrotation=90)
    fig.savefig(os.path.join(logs_dir, f'{pred_field}_confusion_matrix.png'), bbox_inches='tight')
    # plt.show()

    plot = results.plot_confusion_matrix(classes=classes)
    plot._figure.write_json(os.path.join(logs_dir, f'{pred_field}_confusion_matrix.json'))
    plot._figure.write_html(os.path.join(logs_dir, f'{pred_field}_confusion_matrix.html'))
    # plot.show(height=720, width=720)

classification_metrics_summary_df.to_csv(os.path.join(logs_dir, 'classification_metrics.csv'))

Processing YOLOv5
Evaluating detections...


Evaluating detections...




 100% |███████████████| 2500/2500 [3.3m elapsed, 0s remaining, 13.5 samples/s]      


 100% |███████████████| 2500/2500 [3.3m elapsed, 0s remaining, 13.5 samples/s]      


Performing IoU sweep...


Performing IoU sweep...




 100% |███████████████| 2500/2500 [3.1m elapsed, 0s remaining, 13.4 samples/s]      


 100% |███████████████| 2500/2500 [3.1m elapsed, 0s remaining, 13.4 samples/s]      


Ignoring unsupported argument `thresholds` for the 'matplotlib' backend


Ignoring unsupported argument `thresholds` for the 'matplotlib' backend


## Organize View by Mistakenness Score

In [13]:
# compute and add "mistakenness" score ranging between [-1, 1] (no mistakes, high mistakes)
fob.compute_mistakenness(
    predictions_view, 'YOLOv5', label_field='ground_truth', 
)

# set view to be based on mistakenness score
mistake_view = dataset.sort_by('mistakenness', reverse=True)
session.view = mistake_view

Evaluating detections...


Evaluating detections...




 100% |███████████████| 2500/2500 [1.9m elapsed, 0s remaining, 23.5 samples/s]      


 100% |███████████████| 2500/2500 [1.9m elapsed, 0s remaining, 23.5 samples/s]      


Computing mistakenness...


Computing mistakenness...




 100% |███████████████| 2500/2500 [1.6m elapsed, 0s remaining, 28.5 samples/s]      


 100% |███████████████| 2500/2500 [1.6m elapsed, 0s remaining, 28.5 samples/s]      


Mistakenness computation complete


Mistakenness computation complete


RuntimeError: Failed to post event `state_update` to http://localhost:5151/event

## Embedding Visualizations

In [15]:
# Generate visualization for `ground_truth` objects
method = 'umap' #'umap' 'tsne' 'pca'
vis_results = fob.compute_visualization(predictions_view, patches_field="ground_truth", method=method)

Computing patch embeddings...


Computing patch embeddings...
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 239, in _feed
    obj = _ForkingPickler.dumps(obj)
  File "/usr/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
  File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 347, in reduce_storage
    fd, size = storage._share_fd_()
RuntimeError: unable to write to file </torch_1588_1083608515_0>: No space left on device (28)
Traceback (most recent call last):
  File "/usr/lib/python3.8/multiprocessing/queues.py", line 239, in _feed
    obj = _ForkingPickler.dumps(obj)
  File "/usr/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
    cls(buf, protocol).dump(obj)
  File "/usr/local/lib/python3.8/dist-packages/torch/multiprocessing/reductions.py", line 347, in reduce_storage
    fd, size = storage._share_fd_()
RuntimeError: unable to write to file </torch_1524_45833984_0>: No 



  -0% ||--------------|   -1/2500 [4.4s elapsed, ? remaining, ? samples/s] 


  -0% ||--------------|   -1/2500 [4.4s elapsed, ? remaining, ? samples/s] 


RuntimeError: DataLoader worker (pid(s) 1556) exited unexpectedly

In [None]:
 # Generate scatterplot
bbox_area = F('bounding_box')[2] * F('bounding_box')[3]
plot = vis_results.visualize(
    labels=F('ground_truth.detections.label'),
    sizes=F('ground_truth.detections[]').apply(bbox_area),
)
plot.show(height=800)
session.plots.attach(plot)