# Label Containers
* There are two high level containers for labels
    1. LabelList
    2. LabelGenerator
* Tools that are built to convert between formats, help with etl, and model training all will operate on these containers
* Make sure to read basics. Explanations are not repeated here

In [1]:
try:
    import labelbox
except: 
    !git clone https://github.com/Labelbox/labelbox-python.git
    !cd labelbox-python && git checkout ms/annotation-examples && pip install .[data]

In [2]:
from labelbox import Client
from labelbox.data.annotation_types import LabelList, LabelGenerator
from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option
from labelbox import LabelingFrontend
from labelbox.data.annotation_types import (
    Label, 
    RasterData, 
    Mask, 
    Point, 
    Polygon, 
    ClassificationAnswer, 
    Radio, 
    Checklist, 
    ObjectAnnotation, 
    ClassificationAnnotation
)
import requests
import numpy as np
from getpass import getpass
import os

In [3]:
# If you don't want to give google access to drive you can skip this cell
# and manually set `API_KEY` below.
COLAB = "google.colab" in str(get_ipython())
if COLAB:
    !pip install colab-env -qU
    from colab_env import envvar_handler
    envvar_handler.envload()

API_KEY = os.environ.get("LABELBOX_API_KEY")
if not os.environ.get("LABELBOX_API_KEY"):
    API_KEY = getpass("Please enter your labelbox api key")
    if COLAB:
        envvar_handler.add_env("LABELBOX_API_KEY", API_KEY)

In [4]:
# Only update this if you have an on-prem deployment
ENDPOINT = "https://api.labelbox.com/graphql"

client = Client(api_key=API_KEY, endpoint=ENDPOINT)

#### Helper Functions
* See annotation_type_basics for details on this

In [5]:
def signing_function(obj_bytes: bytes) -> str:
    # Do not use this signer. You will not be able to resign these images at a later date.
    url = client.upload_data(content=obj_bytes, sign=True)
    return url

In [21]:
def get_polygon():
    # Given some polygon:
    xy_poly = [
        [60, 161], [67, 177], [76, 180], [77, 222], [82, 246], [78, 291], [72, 300], [87, 300], 
        [94, 244], [103, 243], [100, 269], [90, 290], [95, 296], [104, 292], [108, 272], 
        [111, 300], [121, 300], [117, 244], [128, 236], [133, 298], [142, 297], [137, 250], 
        [146, 208], [138, 185], [120, 180], [105, 189], [112, 162], [93, 156], [72, 160], 
    ]
    return Polygon(points = [Point(x = x, y = y) for x,y in xy_poly])


def get_labels():
    im_h, im_w = 300, 200
    image_url = "https://picsum.photos/id/1003/200/300"
    nose_color, eye_color = (0,255,0), (255,0,0)
    nose_mask = Point(x = 96, y = 194).raster(im_h, im_w, thickness = 3)
    eye_masks = [
         Point(x = 84, y = 182).raster(im_h, im_w, thickness = 3),
        Point(x = 99, y = 181).raster(im_h, im_w, thickness = 3),
    ]
    mask_arr = np.max([*eye_masks,nose_mask] , axis = 0)
    mask = RasterData(arr = mask_arr)
    return [Label(
        data = RasterData(im_bytes = requests.get(image_url).content),
        annotations = [
            ObjectAnnotation(value = get_polygon(),name = "deer"),
            ObjectAnnotation(name = "deer_eyes", value = Mask(mask = mask, color = eye_color)),  
            ObjectAnnotation(name = "deer_nose", value = Mask(mask = mask, color = nose_color),
                classifications = [
                    ClassificationAnnotation(
                        name = "nose_description",
                        value = Radio(
                            answer = ClassificationAnswer(name = "wet")
                        )
                    )
                ]
            ),
            ClassificationAnnotation(name = "image_description", value = Checklist(answer = [
                ClassificationAnswer(name = "bright")
            ]))
        ]
    )]


In [22]:
def show_schema_ids(label):
    for annotation in label.annotations:
        print(f"Object : {annotation.name} - {annotation.schema_id}")
        for classification in getattr(annotation, 'classifications', []):
            print(f"--- Subclass : {classification.name} - {classification.schema_id}")
            option = classification.value
            print(f"--- --- Options: {option.answer.name} - {option.answer.schema_id}")

        if isinstance(annotation, ClassificationAnnotation):
            for option in annotation.value.answer:
                print(f"--- Options: {option.name} - {option.schema_id}")

In [23]:

def setup_project():
    # These names have to match our object names exactly!!
    ontology_builder = OntologyBuilder(tools=[
        Tool(tool=Tool.Type.POLYGON, name="deer"),
        Tool(tool=Tool.Type.SEGMENTATION, name="deer_nose", 
             classifications = [
                 Classification(
                     class_type = Classification.Type.RADIO, 
                     instructions = "nose_description", 
                     options = [Option(value = "wet")]
                 )]),
        Tool(tool=Tool.Type.SEGMENTATION, name="deer_eyes")    
    ], classifications = [
        Classification(
            Classification.Type.CHECKLIST, 
            instructions = "image_description", 
            options = [Option(value = "bright"), Option(value = "not_blurry"), Option(value = "dark")])])

    editor = next(
        client.get_labeling_frontends(where=LabelingFrontend.name == "Editor"))
    project = client.create_project(name="test_annotation_types")
    project.setup(editor, ontology_builder.asdict())
    dataset = client.create_dataset(name = 'my_ds')
    project.datasets.connect(dataset)

    ontology = OntologyBuilder.from_project(project)
    return ontology, dataset, project


In [24]:
def print_mask_urls(label):
    for annotation in label.annotations:
        if isinstance(annotation.value, Mask):
            print(annotation.value.mask.url)

In [25]:
def show_references(label):
    print('\n---  schema ids ---\n')
    show_schema_ids(label)
    print("\n--- mask urls ---\n")
    print_mask_urls(label)
    print('\n--- image url ---\n')
    print(label.data.url)    
    print('\n--- data row reference ---\n')
    print(original_label.data.uid)

# LabelList
* This object is essentially a list of Labels with a set of helpful utilties
* This object is simple and fast at the expense of memory
    * Larger datasets shouldn't use label list ( or at least will require more ram ).
* Why use label list over just a list of labels?
    * Multithreaded utilities (faster)
    * Compatible with converter functions (functions useful for translating between formats, etl, and training )

In [26]:
labels = get_labels()
label_list = LabelList(labels)

## Iterate

In [27]:
# Iterable, behaves like a list
for label in label_list:
    print(type(label))
# Get length
print(len(label_list))
# By index
print(type(label_list[0]))

<class 'labelbox.data.annotation_types.label.Label'>
1
<class 'labelbox.data.annotation_types.label.Label'>


### Upload segmentation masks

In [28]:
### Add urls to all segmentation masks:
# (in parallel)
for label in label_list:
    print_mask_urls(label)
    
label_list.add_url_to_masks(signing_function)

for label in label_list:
    print_mask_urls(label)
# Again note that these all share the same segmentation mask
# ( This is determined by the fact that they share the same reference )
# This mask is only uploaded once

0it [00:00, ?it/s]

None
None


1it [00:00,  2.59it/s]

https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F689e4c26-d397-b0ee-912f-01ed98597092-1?Expires=1627331194133&KeyName=labelbox-assets-key-3&Signature=ouNZBuB2Di38hIAEc2oqBXaCjJ4
https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F689e4c26-d397-b0ee-912f-01ed98597092-1?Expires=1627331194133&KeyName=labelbox-assets-key-3&Signature=ouNZBuB2Di38hIAEc2oqBXaCjJ4





### Create signed urls for data

In [29]:
### Add urls to all segmentation masks:
# (in parallel)
print(label_list[0].data.url)
label_list.add_url_to_data(signing_function)
print(label_list[0].data.url)

0it [00:00, ?it/s]

None


1it [00:00,  2.56it/s]

https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F99b31026-2cfc-1594-8d61-98330c953a52-1?Expires=1627331194575&KeyName=labelbox-assets-key-3&Signature=b_imojOA7K6yLU9qvFsoQkXiEgI





### Add to labelbox dataset

In [30]:
# For the next two sections we need an ontology and dataset
ontology, dataset, project = setup_project()

In [31]:
print(label_list[0].data.uid)
# Note that this function will assign a uuid as the external id if it isn't provided.
label_list.add_to_dataset(dataset, signing_function)
print(label_list[0].data.uid)

1it [00:00, 1137.90it/s]

None





ckrjnjdqaualy0ypkbxnc8eh1


### Add schema ids

In [32]:
for label in label_list:
    show_schema_ids(label)
# Note that this function will assign a uuid as the external id if it isn't provided.
label_list.assign_schema_ids(ontology)
print('-'* 50)
for label in label_list:
    show_schema_ids(label)

Object : deer - None
Object : deer_eyes - None
Object : deer_nose - None
--- Subclass : nose_description - None
--- --- Options: wet - None
Object : image_description - None
--- Options: bright - None
--------------------------------------------------
Object : deer - ckrjnjc8sltga0y8u08nd4ta9
Object : deer_eyes - ckrjnjc8sltge0y8ufgrm11sl
Object : deer_nose - ckrjnjc8sltgc0y8u5wbi9bkf
--- Subclass : nose_description - ckrjnjc9oltgm0y8ueemo7cbj
--- --- Options: wet - ckrjnjcaaltgo0y8u97ze8y99
Object : image_description - ckrjnjc8rltg80y8u19aghqcy
--- Options: bright - ckrjnjc9kltgg0y8uce3u132y


In [33]:
# cleanup:
dataset.delete()
project.delete()

# LabelGenerator
* This object generates labels and provides a set of helpful utilties
* This object is complex and slower than LabelList in order to be highly memory efficient
    * Larger datasets should use label generators
* Why use label generator over just a generator that yields labels?
    * This object supports parallel io operations to buffer results in the background.
    * Compatible with converter functions (functions useful for translating between formats, etl, and training )
* The first qsize elements run serially from when the chained functions are added.
    * After that iterating will get much faster.

In [34]:
labels = get_labels()
label_generator = LabelGenerator(labels)
ontology, dataset, project = setup_project()

In [35]:
# So we can't show the before and afters because the generator is not repeatable

try:
    label = next(label_generator)
    print("Ran once")
    label = next(label_generator)
    print("Ran twice")
except StopIteration:
    pass

Ran once


In [36]:
# Does not support indexing ( it is a generator.. )
try:
    label_generator[0]
    print("Can index")
except TypeError:
    print("Unable to index")

    

Unable to index


### Functions to modify results
* We can set functions to run on the result of the generator
* Since these are run in background threads it is a lot faster than applying them on each label individually
* The functions are lazily evaluated

In [37]:
# Recreate because we already went through all of the items when we showed that it isn't repeatable
original_label = labels[0]

show_references(original_label)
label_generator = LabelGenerator(labels) \
        .add_url_to_masks(signing_function) \
        .add_to_dataset(dataset, signing_function) \
        .assign_schema_ids(ontology)




---  schema ids ---

Object : deer - None
Object : deer_eyes - None
Object : deer_nose - None
--- Subclass : nose_description - None
--- --- Options: wet - None
Object : image_description - None
--- Options: bright - None

--- mask urls ---

None
None

--- image url ---

None

--- data row reference ---

None


In [38]:
show_references(original_label)


---  schema ids ---

Object : deer - None
Object : deer_eyes - None
Object : deer_nose - None
--- Subclass : nose_description - None
--- --- Options: wet - None
Object : image_description - None
--- Options: bright - None

--- mask urls ---

None
None

--- image url ---

None

--- data row reference ---

None


In [39]:
label = next(label_generator)
show_references(original_label)


---  schema ids ---

Object : deer - ckrjnjkgo0acl0y8d3jb03y44
Object : deer_eyes - ckrjnjkgp0acp0y8d4qrh77hr
Object : deer_nose - ckrjnjkgo0acn0y8datv48hq4
--- Subclass : nose_description - ckrjnjkhf0acx0y8d5zrf0myf
--- --- Options: wet - ckrjnjkhz0acz0y8dbmlp9moo
Object : image_description - ckrjnjkgn0acj0y8de44k7ibe
--- Options: bright - ckrjnjkha0acr0y8d173f2paz

--- mask urls ---

https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F9fad073e-3073-0ac3-9c13-176806f4d580-1?Expires=1627331208015&KeyName=labelbox-assets-key-3&Signature=zyBB8gaW76Np_-NdTnhkAjAhae0
https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F9fad073e-3073-0ac3-9c13-176806f4d580-1?Expires=1627331208015&KeyName=labelbox-assets-key-3&Signature=zyBB8gaW76Np_-NdTnhkAjAhae0

--- image url ---

https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2Ffdd6a888-90b8-2c23-e5fd-1c9294bfb8aa-1?Expires=1627331209063&KeyName=labelbox-assets-key-3&Signature=YjAhWZgw9nScHopBbmsohJLGvB4

--- data row reference ---

ckr

* Note that the first qsize elements run serially from when the chained functions are added.
* After that iterating will get much faster.

In [40]:
# LabelGenerators can be converted to a LabelList
LabelGenerator(labels).as_list()

<labelbox.data.annotation_types.collection.LabelList at 0x1076ae400>

In [41]:
dataset.delete()
project.delete()