# Adding Object Detection Predictions to a Voxel51 Dataset
This notebook will add predictions from an object detection model to the samples in a Voxel51 Dataset.

Adapted from: https://voxel51.com/docs/fiftyone/recipes/model_inference.html

In [None]:
model_path = '/tf/model-export/mobilenet_plane_detect/saved_model' # The path of the saved Object Detection model
dataset_name = "test_dataset" # Name of the Voxel51 Dataset to use
field_name = "predict_model" # Name of the field to store the predictions in
labelmap_file = '/tf/dataset-export/mobilenet_plane_detect/label_map.pbtxt' # the location of the labelmap file to use
min_score = 0.5 # This is the minimum score for adding a prediction. This helps keep out bad predictions but it may need to be adjusted if your model is not that good yet.
# dimensions of images
img_width, img_height = 320, 320

In [1]:
#config
import fiftyone as fo
import os

dataset = fo.load_dataset(dataset_name)  

In [2]:
import io
import os
import scipy.misc
import numpy as np
import six
import time

from six import BytesIO

import matplotlib
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
from object_detection.utils import label_map_util
import tensorflow as tf
from object_detection.utils import visualization_utils as viz_utils

%matplotlib inline




### Load saved model
Loading a saved objection detection model is a little weird. I found some info on it:
https://github.com/tensorflow/models/blob/master/research/object_detection/colab_tutorials/inference_from_saved_model_tf2_colab.ipynb

In [3]:
start_time = time.time()
tf.keras.backend.clear_session()
detect_fn = tf.saved_model.load(model_path)
end_time = time.time()
elapsed_time = end_time - start_time
print('Elapsed time: ' + str(elapsed_time) + 's')


Elapsed time: 17.72215247154236s


In [4]:
# small function that preprocesses the images so that the model can read them in

def load_image_into_numpy_array(path):
  """Load an image from file into a numpy array.

  Puts image into numpy array to feed into tensorflow graph.
  Note that by convention we put it into a numpy array with shape
  (height, width, channels), where channels=3 for RGB.

  Args:
    path: a file path (this can be local or on colossus)

  Returns:
    uint8 numpy array with shape (img_height, img_width, 3)
  """
  img_data = tf.io.gfile.GFile(path, 'rb').read()
  image = Image.open(BytesIO(img_data))
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)



### Load the LabelMap file

In [14]:
label_map = label_map_util.load_labelmap(labelmap_file)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=100)
category_index = label_map_util.create_category_index(categories)

def findClassName(class_id):
    return category_index[class_id]["name"]

ERJ-170


### Add predictions
Itterate through all the samples, run them through the model and add the predictions to the sample

In [7]:
view = dataset.shuffle() # Adjust the view as needed

for sample in view:
    start_time = time.time()
    image_np = load_image_into_numpy_array(sample.filepath)
    input_tensor = np.expand_dims(image_np, 0)
    detections = detect_fn(input_tensor)
    exportDetections = []
    for i, detectScore in enumerate(detections['detection_scores'][0]):
        if detectScore > min_score:
            print("\t- {}: {}".format(findClassName(int(detections['detection_classes'][0][i])), detections['detection_scores'][0][i]))

            label = findClassName(int(detections['detection_classes'][0][i]))
            confidence = detections['detection_scores'][0][i]
            # TF Obj Detect bounding boxes are: [ymin, xmin, ymax, xmax]
            
            # For Voxel 51 - Bounding box coordinates should be relative values
            # in [0, 1] in the following format:
            # [top-left-x, top-left-y, width, height]
            x1 = detections['detection_boxes'][0][i][1]
            y1 = detections['detection_boxes'][0][i][0]
            x2 = detections['detection_boxes'][0][i][3]
            y2 = detections['detection_boxes'][0][i][2]
            w = x2 - x1
            h = y2 - y1
            bbox = [x1, y1, w, h]

            exportDetections.append( fo.Detection(label=label, bounding_box=bbox, confidence=confidence))
    end_time = time.time()
    print("Found {} Planes, took {} seconds".format(len(exportDetections), end_time-start_time) )   
   
    # Store detections in a field name of your choice
    sample[field_name] = fo.Detections(detections=exportDetections)
    sample.save()    


Dataset:        plane-dataset
Media type:     image
Num samples:    1
Tags:           ['good_box', 'plane']
Sample fields:
    filepath:      fiftyone.core.fields.StringField
    tags:          fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:      fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    icao24:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    plane:         fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    plane_spot:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    labelbox_id:   fiftyone.core.fields.StringField
    plane-box:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    plane_box:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    planebox:      fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    model:      

# Examine the results
Here is some example code on how you could test how well the predictions match ground truth data.

In [31]:
predict_model_view = dataset.exists("predict_model")
total=0
top3_total=0
for sample in predict_model_view:
    top_detect = sample["predict_model"].detections[0]
    bb_area = top_detect["bounding_box"][2] * top_detect["bounding_box"][3]
    
    if sample["norm_model"].label==top_detect["label"]:
        match="Match"
        top3_match="Top3 Match"
        total = total+1
        top3_total=top3_total+1
        found=True
        top3_found=True
    else:
        match="!NO Match!"
        top3_match="!NO TOP3 Match!"
        found=False
        top3_found=False
        for i,guess in enumerate(sample["predict_model"].detections):
            if i>3:
                break
            if sample["norm_model"].label==guess["label"]:
                top3_match="Top3 Match"
                top3_found=True
                top3_total=top3_total+1
                break
                
    #print("{}\t{}\t\t{}\t\t{}".format(bb_area,sample["norm_model"].label,match,top3_match))
    print("{}, {}, {}, {}".format(bb_area,sample["norm_model"].label,found,top3_found))
print("{}\n{}\n\n{}\n{}".format(total,100-total,top3_total,100-top3_total))

0.003363115119441318, A321, False, False
0.004131421679733904, A321, False, True
0.00306788170649952, 757-200, False, False
0.003407978022078595, 757-200, False, False
0.004803034913855697, A330, False, True
0.002368545882305284, ERJ-170, True, True
0.0023542858515170906, ERJ-170, True, True
0.002527645226944486, ERJ-170, False, True
0.0012630497521257666, Learjet 45/60, False, True
0.001340467587462868, CRJ700, False, False
0.0031371196698231074, 737-900, False, False
0.002852709476428572, 737-900, False, False
0.0024098699581678318, 737-900, False, False
0.0013388308064037346, Cessna Jet, True, True
0.002016396768755868, A320, False, True
0.0026610918499443414, 757-200, False, False
0.0029156908491483335, 757-200, False, False
0.002778857595231443, 757-200, False, False
0.0032397564838007042, 757-200, False, True
0.00216837580980922, A321, False, True
0.002636617286100318, A321, False, False
0.004325663871298957, 787-800, False, True
0.0020077339682273987, A320, True, True
0.00236754

## View the Results
Use the UI to examine the predictions. You can select poorly performing samples and tag them for relabeling.

In [32]:
session = fo.launch_app(dataset, auto=False)

Session launched. Run `session.show()` to open the App in a cell output.


In [None]:
view = dataset.exists("predict_model")#.match({"relabel": {"$exists": False, "$eq": None}})
session = fo.launch_app(view, auto=False)
print(view)
#session.view = view

### Select Samples
Select poorly performing samples in the UI and then run to code below to tag the selected samples for relabeling.

In [59]:
# Create a view containing only the selected samples
selected_view = dataset.select(session.selected)
print(selected_view)
for sample in selected_view:
    sample.tags.append("relabel")
    sample.save() 

Dataset:        plane-dataset
Media type:     image
Num samples:    1
Tags:           ['plane']
Sample fields:
    filepath:    fiftyone.core.fields.StringField
    tags:        fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    icao24:      fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    plane:       fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    plane_spot:  fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    labelbox_id: fiftyone.core.fields.StringField
    plane-box:   fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    plane_box:   fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    planebox:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
View stages:
    1. Select(sample_ids=['60147b2d

ValueError: Failed to load sample from the database. This is likely due to an invalid stage in the DatasetView