# Export from Labelbox to Voxel51
After you have finished labeling data in LabelBox, this notebook lets you import the labels back into a Voxel51 Dataset.
In the Labelbox web UI, export the project and download the JSON file. 

In [9]:
LABELBOX_EXPORT_JSON = "/tf/media/export-2021-05-20T13_18_09.947Z.json"
#labelboxExportJson =  "/tf/notebooks/export-2021-02-01T01-34-34.538Z.json" # Download the exported JSON and update this
DATASET_NAME = "jsm-test-dataset"
#dataset_name = "test-dataset" # The name of the V51 Dataset to use
LABELBOX_ID_FIELD = "labelbox_id"
#labelbox_id_field = "labelbox_id" # V51 Sample field where the corresponding Labelbox ID was save when it was uploaded to Labelbox

In [28]:
"""
Utilities for working with annotations in
`Labelbox format <https://labelbox.com/docs/exporting-data/export-format-detail>`_.

| Copyright 2017-2021, Voxel51, Inc.
| `voxel51.com <https://voxel51.com/>`_
|
"""
from copy import copy
import logging
import os
from uuid import uuid4
import warnings

import numpy as np

import eta.core.image as etai
import eta.core.serial as etas
import eta.core.utils as etau
import eta.core.web as etaw

import fiftyone.core.collections as foc
import fiftyone.core.fields as fof
import fiftyone.core.labels as fol
import fiftyone.core.media as fomm
import fiftyone.core.metadata as fom
import fiftyone.core.sample as fos
import fiftyone.core.utils as fou


logger = logging.getLogger(__name__)


#
# @todo
#   Must add support add support for populating `schemaId` when exporting
#   labels in order for model-assisted labeling to work properly
#
#   cf https://labelbox.com/docs/automation/model-assisted-labeling
#


def custom_import_from_labelbox(
    dataset,
    json_path,
    label_prefix=None,
    download_dir=None,
    labelbox_id_field="labelbox_id",
):
    """Imports the labels from the Labelbox project into the FiftyOne dataset.

    The ``labelbox_id_field`` of the FiftyOne samples are used to associate the
    corresponding Labelbox labels.

    If a ``download_dir`` is provided, any Labelbox IDs with no matching
    FiftyOne sample are added to the FiftyOne dataset, and their media is
    downloaded into ``download_dir``.

    The provided ``json_path`` should contain a JSON file in the following
    format::

        [
            {
                "ID": <labelbox-id>,
                "Labeled Data": <url-or-None>,
                "Label": {...}
            }
        ]

    When importing image labels, the ``Label`` field should contain a dict of
    `Labelbox image labels <https://labelbox.com/docs/exporting-data/export-format-detail#images>`_::

        {
            "objects": [...],
            "classifications": [...]
        }

    When importing video labels, the ``Label`` field should contain a dict as
    follows::

        {
            "frames": <url-or-filepath>
        }

    where the ``frames`` field can either contain a URL, in which case the
    file is downloaded from the web, or the path to NDJSON file on disk of
    `Labelbox video labels <https://labelbox.com/docs/exporting-data/export-format-detail#video>`_::

        {"frameNumber": 1, "objects": [...], "classifications": [...]}
        {"frameNumber": 2, "objects": [...], "classifications": [...]}
        ...

    Args:
        dataset: a :class:`fiftyone.core.dataset.Dataset`
        json_path: the path to the Labelbox JSON export to load
        labelbox_project_or_json_path: a ``labelbox.schema.project.Project`` or
            the path to the JSON export of a Labelbox project on disk
        label_prefix (None): a prefix to prepend to the sample label field(s)
            that are created, separated by an underscore
        download_dir (None): a directory into which to download the media for
            any Labelbox IDs with no corresponding sample with the matching
            ``labelbox_id_field`` value. This can be omitted if all IDs are
            already present or you do not wish to download media and add new
            samples
        labelbox_id_field ("labelbox_id"): the sample field to lookup/store the
            IDs of the Labelbox DataRows
    """
    if download_dir:
        filename_maker = fou.UniqueFilenameMaker(output_dir=download_dir)

    if labelbox_id_field not in dataset.get_field_schema():
        dataset.add_sample_field(labelbox_id_field, fof.StringField)

    id_map = {}
    for sample in dataset.select_fields(labelbox_id_field):
        id_map[sample[labelbox_id_field]] = sample.id

    if label_prefix:
        label_key = lambda k: label_prefix + "_" + k
    else:
        label_key = lambda k: k

    is_video = dataset.media_type == fomm.VIDEO

    # Load labels
    d_list = etas.read_json(json_path)

    # ref: https://github.com/Labelbox/labelbox/blob/7c79b76310fa867dd38077e83a0852a259564da1/exporters/coco-exporter/coco_exporter.py#L33
    with fou.ProgressBar() as pb:
        for d in pb(d_list):
            labelbox_id = d["DataRow ID"]

            if labelbox_id in id_map:
                # Get existing sample
                sample = dataset[id_map[labelbox_id]]
            elif download_dir:
                # Download image and create new sample
                # @todo optimize by downloading images in a background thread
                # pool?
                image_url = d["Labeled Data"]
                filepath = filename_maker.get_output_path(image_url)
                etaw.download_file(image_url, path=filepath, quiet=True)
                sample = fos.Sample(filepath=filepath)
                dataset.add_sample(sample)
            else:
                logger.info(
                    "Skipping labels for unknown Labelbox ID '%s'; provide a "
                    "`download_dir` if you wish to download media and create "
                    "samples for new media",
                    labelbox_id,
                )
                continue

            if sample.metadata is None:
                if is_video:
                    sample.metadata = fom.VideoMetadata.build_for(
                        sample.filepath
                    )
                else:
                    sample.metadata = fom.ImageMetadata.build_for(
                        sample.filepath
                    )

            if is_video:
                frame_size = (
                    sample.metadata.frame_width,
                    sample.metadata.frame_height,
                )
                frames = _parse_video_labels(d["Label"], frame_size)
                sample.frames.merge(
                    {
                        frame_number: {
                            label_key(fname): flabel
                            for fname, flabel in frame_dict.items()
                        }
                        for frame_number, frame_dict in frames.items()
                    }
                )
            else:
                frame_size = (sample.metadata.width, sample.metadata.height)
                labels_dict = _parse_image_labels(d["Label"], frame_size)
                sample.update_fields(
                    {label_key(k): v for k, v in labels_dict.items()}
                )

            sample.save()

def _convert_labelbox_frames_export_to_import(inpath, outpath):
    din_list = etas.read_ndjson(inpath)

    dout_map = {}

    for din in din_list:
        frame_number = din.pop("frameNumber")
        din.pop("dataRow")
        din.pop("uuid")

        if frame_number not in dout_map:
            dout_map[frame_number] = {
                "frameNumber": frame_number,
                "objects": [],
                "classifications": [],
            }

        _ingest_label(din, dout_map[frame_number])

    dout = [dout_map[fn] for fn in sorted(dout_map.keys())]
    etas.write_ndjson(dout, outpath)


def _ingest_label(din, d_label):
    if any(k in din for k in ("bbox", "polygon", "line", "point", "mask")):
        # Object
        if "mask" in din:
            din["instanceURI"] = din.pop("mask")["instanceURI"]

        d_label["objects"].append(din)
    else:
        # Classification
        d_label["classifications"].append(din)


def _get_labels(sample_or_frame, label_fields):
    labels_dict = {}
    for field, key in label_fields.items():
        value = sample_or_frame[field]
        if value is not None:
            labels_dict[key] = value

    return labels_dict


def _get_frame_labels(sample, frame_label_fields):
    frames = {}
    for frame_number, frame in sample.frames.items():
        frames[frame_number] = _get_labels(frame, frame_label_fields)

    return frames


def _to_labelbox_image_labels(labels_dict, frame_size, data_row_id):
    annotations = []
    for name, label in labels_dict.items():
        if isinstance(label, (fol.Classification, fol.Classifications)):
            anno = _to_global_classification(name, label, data_row_id)
            annotations.append(anno)
        elif isinstance(label, (fol.Detection, fol.Detections)):
            annos = _to_detections(label, frame_size, data_row_id)
            annotations.extend(annos)
        elif isinstance(label, (fol.Polyline, fol.Polylines)):
            annos = _to_polylines(label, frame_size, data_row_id)
            annotations.extend(annos)
        elif isinstance(label, (fol.Keypoint, fol.Keypoints)):
            annos = _to_points(label, frame_size, data_row_id)
            annotations.extend(annos)
        elif isinstance(label, fol.Segmentation):
            annos = _to_mask(name, label, data_row_id)
            annotations.extend(annos)
        elif label is not None:
            msg = "Ignoring unsupported label type '%s'" % label.__class__
            warnings.warn(msg)

    return annotations


def _to_labelbox_video_labels(frames, frame_size, data_row_id):
    annotations = []
    for frame_number, labels_dict in frames.items():
        frame_annos = _to_labelbox_image_labels(
            labels_dict, frame_size, data_row_id
        )
        for anno in frame_annos:
            anno["frameNumber"] = frame_number
            annotations.append(anno)

    return annotations


# https://labelbox.com/docs/exporting-data/export-format-detail#classification
def _to_global_classification(name, label, data_row_id):
    anno = _make_base_anno(name, data_row_id=data_row_id)
    anno.update(_make_classification_answer(label))
    return anno


# https://labelbox.com/docs/exporting-data/export-format-detail#nested_classification
def _to_nested_classifications(attributes):
    classifications = []
    for name, attr in attributes.items():
        if not isinstance(attr, (fol.CategoricalAttribute, fol.ListAttribute)):
            msg = "Ignoring unsupported attribute type '%s'" % attr.__class__
            warnings.warn(msg)
            continue

        anno = _make_base_anno(name)
        anno.update(_make_classification_answer(attr))
        classifications.append(anno)

    return classifications


# https://labelbox.com/docs/automation/model-assisted-labeling#mask_annotations
def _to_mask(name, label, data_row_id):
    mask = np.asarray(label.mask)
    if mask.ndim < 3 or mask.dtype != np.uint8:
        raise ValueError(
            "Segmentation masks must be stored as RGB color uint8 images"
        )

    try:
        instance_uri = label.instance_uri
    except:
        raise ValueError(
            "You must populate the `instance_uri` field of segmentation masks"
        )

    # Get unique colors
    colors = np.unique(np.reshape(mask, (-1, 3)), axis=0).tolist()

    annos = []
    base_anno = _make_base_anno(name, data_row_id=data_row_id)
    for color in colors:
        anno = copy(base_anno)
        anno["mask"] = _make_mask(instance_uri, color)
        annos.append(anno)

    return annos


# https://labelbox.com/docs/exporting-data/export-format-detail#bounding_boxes
def _to_detections(label, frame_size, data_row_id):
    if isinstance(label, fol.Detections):
        detections = label.detections
    else:
        detections = [label]

    annos = []
    for detection in detections:
        anno = _make_base_anno(detection.label, data_row_id=data_row_id)
        anno["bbox"] = _make_bbox(detection.bounding_box, frame_size)
        if detection.attributes:
            anno["classifications"] = _to_nested_classifications(
                detection.attributes
            )

        annos.append(anno)

    return annos


# https://labelbox.com/docs/exporting-data/export-format-detail#polygons
# https://labelbox.com/docs/exporting-data/export-format-detail#polylines
def _to_polylines(label, frame_size, data_row_id):
    if isinstance(label, fol.Polylines):
        polylines = label.polylines
    else:
        polylines = [label]

    annos = []
    for polyline in polylines:
        field = "polygon" if polyline.filled else "line"
        if polyline.attributes:
            classifications = _to_nested_classifications(polyline.attributes)
        else:
            classifications = None

        for points in polyline.points:
            anno = _make_base_anno(polyline.label, data_row_id=data_row_id)
            anno[field] = [_make_point(point, frame_size) for point in points]
            if classifications is not None:
                anno["classifications"] = classifications

            annos.append(anno)

    return annos


# https://labelbox.com/docs/exporting-data/export-format-detail#points
def _to_points(label, frame_size, data_row_id):
    if isinstance(label, fol.Keypoints):
        keypoints = label.keypoints
    else:
        keypoints = [keypoints]

    annos = []
    for keypoint in keypoints:
        if keypoint.attributes:
            classifications = _to_nested_classifications(keypoint.attributes)
        else:
            classifications = None

        for point in keypoint.points:
            anno = _make_base_anno(keypoint.label, data_row_id=data_row_id)
            anno["point"] = _make_point(point, frame_size)
            if classifications is not None:
                anno["classifications"] = classifications

            annos.append(anno)

    return annos


def _make_base_anno(value, data_row_id=None):
    anno = {
        "uuid": str(uuid4()),
        "schemaId": None,
        "title": value,
        "value": value,
    }

    if data_row_id:
        anno["dataRow"] = {"id": data_row_id}

    return anno


def _make_video_anno(labels_path, data_row_id=None):
    anno = {
        "uuid": str(uuid4()),
        "frames": labels_path,
    }

    if data_row_id:
        anno["dataRow"] = {"id": data_row_id}

    return anno


def _make_classification_answer(label):
    if isinstance(label, fol.Classification):
        # Assume free text
        return {"answer": label.label}

    if isinstance(label, fol.Classifications):
        # Assume checklist
        answers = []
        for classification in label.classifications:
            answers.append({"value": classification.label})

        return {"answers": answers}

    if isinstance(label, fol.CategoricalAttribute):
        # Assume free text
        return {"answer": label.value}

    if isinstance(label, fol.ListAttribute):
        # Assume checklist
        answers = []
        for value in label.value:
            answers.append({"value": value})

        return {"answers": answers}

    raise ValueError("Cannot convert %s to a classification" % label.__class__)


def _make_bbox(bounding_box, frame_size):
    x, y, w, h = bounding_box
    width, height = frame_size
    return {
        "left": round(x * width, 1),
        "top": round(y * height, 1),
        "width": round(w * width, 1),
        "height": round(h * height, 1),
    }


def _make_point(point, frame_size):
    x, y = point
    width, height = frame_size
    return {"x": round(x * width, 1), "y": round(y * height, 1)}


def _make_mask(instance_uri, color):
    return {
        "instanceURI": instance_uri,
        "colorRGB": list(color),
    }


# https://labelbox.com/docs/exporting-data/export-format-detail#video
def _parse_video_labels(video_label_d, frame_size):
    url_or_filepath = video_label_d["frames"]
    label_d_list = _download_or_load_ndjson(url_or_filepath)

    frames = {}
    for label_d in label_d_list:
        frame_number = label_d["frameNumber"]
        frames[frame_number] = _parse_image_labels(label_d, frame_size)

    return frames


# https://labelbox.com/docs/exporting-data/export-format-detail#images
def _parse_image_labels(label_d, frame_size):
    labels = {}

    # Parse classifications
    cd_list = label_d.get("classifications", [])
    classifications = _parse_classifications(cd_list)
    labels.update(classifications)

    # Parse objects
    # @todo what if `objects.keys()` conflicts with `classifications.keys()`?
    od_list = label_d.get("objects", [])
    objects = _parse_objects(od_list, frame_size)
    labels.update(objects)

    return labels


def _parse_classifications(cd_list):
    labels = {}

    for cd in cd_list:
        name = cd["value"]
        if "answer" in cd:
            answer = cd["answer"]
            if isinstance(answer, list):
                # Dropdown
                labels[name] = fol.Classifications(
                    classifications=[
                        fol.Classification(label=a["value"]) for a in answer
                    ]
                )
            elif isinstance(answer, dict):
                # Radio question
                labels[name] = fol.Classification(label=answer["value"])
            else:
                # Free text
                labels[name] = fol.Classification(label=answer)

        if "answers" in cd:
            # Checklist
            answers = cd["answers"]
            labels[name] = fol.Classifications(
                classifications=[
                    fol.Classification(label=a["value"]) for a in answers
                ]
            )

    return labels


def _parse_attributes(cd_list):
    attributes = {}

    for cd in cd_list:
        name = cd["value"]
        if "answer" in cd:
            answer = cd["answer"]
            if isinstance(answer, list):
                # Dropdown
                attributes[name] = fol.ListAttribute(
                    value=[a["value"] for a in answer]
                )
            elif isinstance(answer, dict):
                # Radio question
                attributes[name] = fol.CategoricalAttribute(
                    value=answer["value"]
                )
            else:
                # Free text
                attributes[name] = fol.CategoricalAttribute(value=answer)

        if "answers" in cd:
            # Checklist
            answers = cd["answers"]
            attributes[name] = fol.ListAttribute(
                value=[a["value"] for a in answers]
            )

    return attributes


def _parse_objects(od_list, frame_size):
    detections = []
    polylines = []
    keypoints = []
    mask = None
    mask_instance_uri = None
    for od in od_list:
        label = od["value"]
        attributes = _parse_attributes(od.get("classifications", []))

        if "bbox" in od:
            # Detection
            bounding_box = _parse_bbox(od["bbox"], frame_size)
            detections.append(
                fol.Detection(
                    label=label,
                    bounding_box=bounding_box,
                    attributes=attributes,
                )
            )
        elif "polygon" in od:
            # Polyline
            points = _parse_points(od["polygon"], frame_size)
            polylines.append(
                fol.Polyline(
                    label=label,
                    points=[points],
                    closed=True,
                    filled=True,
                    attributes=attributes,
                )
            )
        elif "line" in od:
            # Polyline
            points = _parse_points(od["line"], frame_size)
            polylines.append(
                fol.Polyline(
                    label=label,
                    points=[points],
                    closed=True,
                    filled=False,
                    attributes=attributes,
                )
            )
        elif "point" in od:
            # Polyline
            point = _parse_point(od["point"], frame_size)
            keypoints.append(
                fol.Keypoint(
                    label=label, points=[point], attributes=attributes,
                )
            )
        elif "instanceURI" in od:
            # Segmentation mask
            if mask is None:
                mask_instance_uri = od["instanceURI"]
                mask = _parse_mask(mask_instance_uri)
            elif od["instanceURI"] != mask_instance_uri:
                msg = (
                    "Only one segmentation mask per image/frame is allowed; "
                    "skipping additional mask(s)"
                )
                warnings.warn(msg)
        else:
            msg = "Ignoring unsupported label"
            warnings.warn(msg)

    labels = {}

    if detections:
        labels["detections"] = fol.Detections(detections=detections)

    if polylines:
        labels["polylines"] = fol.Polylines(polylines=polylines)

    if keypoints:
        labels["keypoints"] = fol.Keypoints(keypoints=keypoints)

    if mask is not None:
        labels["segmentation"] = mask

    return labels


def _parse_bbox(bd, frame_size):
    width, height = frame_size
    x = bd["left"] / width
    y = bd["top"] / height
    w = bd["width"] / width
    h = bd["height"] / height
    return [x, y, w, h]


def _parse_points(pd_list, frame_size):
    return [_parse_point(pd, frame_size) for pd in pd_list]


def _parse_point(pd, frame_size):
    width, height = frame_size
    return (pd["x"] / width, pd["y"] / height)


def _parse_mask(instance_uri):
    img_bytes = etaw.download_file(instance_uri, quiet=True)
    return etai.decode(img_bytes)


def _download_or_load_ndjson(url_or_filepath):
    if url_or_filepath.startswith("http"):
        ndjson_bytes = etaw.download_file(url_or_filepath, quiet=True)
        return etas.load_ndjson(ndjson_bytes)

    return etas.read_ndjson(url_or_filepath)


## Labelbox Export

In [1]:
# Imports and configuration 
import fiftyone as fo

In [2]:
# function to help printing out dataset names
#print(fo.list_datasets())

['jsm-test-dataset', 'plane-dataset']


In [4]:
# Do the groundwork for importing, setup the dataset
import fiftyone.utils.labelbox as foul
from uuid import uuid4

# expect an error here if the dataset already exists
dataset = fo.load_dataset(DATASET_NAME)

In [5]:
# TODO: Potentially delete
# Adding a LABELBOX_ID_FIELD if it doesn't already exist
#dataset.add_sample_field(LABELBOX_ID_FIELD, fo.StringField) 

Name:        jsm-test-dataset
Media type:  image
Num samples: 5151
Persistent:  True
Tags:        []
Sample fields:
    filepath:     fiftyone.core.fields.StringField
    tags:         fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.Metadata)
    external_id:  fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    bearing:      fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    elevation:    fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    distance:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    icao24:       fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    model:        fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Classification)
    manufacturer: fiftyone.core.fields.EmbeddedDocumentField(fiftyon

In [29]:
# Imports the data from Labelbox into a Voxel51 Dataset
custom_import_from_labelbox(dataset,
                          LABELBOX_EXPORT_JSON,
                          labelbox_id_field=LABELBOX_ID_FIELD)

Skipping labels for unknown Labelbox ID 'ckou3botb2y0d0yw47ftmggdr'; provide a `download_dir` if you wish to download media and create samples for new media
 100% |███████████████████| 56/56 [2.8s elapsed, 0s remaining, 19.7 samples/s]      


### Examine the results

In [None]:
session = fo.launch_app(dataset, auto=False)

## Post Processing
You may want to do some additional data munging. I added a tag based on whether a plane was labeled or skipped in Labelbox.

In [None]:
# Add a label & tag that captures if the image was skipped, indicating there was no plane, or accepted, indicating there was a plane
from fiftyone import ViewField as F
label_field = "plane_ground_truth" 

model_view = dataset.exists("model")
for sample in model_view:
    sample[label_field] = fo.Classification(label="plane")
    sample.tags.append("plane")
    sample.save()


skipped_view = dataset.match({"model": {"$exists": False, "$eq": None}})
for sample in skipped_view:
    #print(sample)
    sample[label_field] = fo.Classification(label="noplane")
    sample.tags.append("noPlane")
    sample.save()    