# Label Containers
* There are two high level containers for labels
    1. LabelCollection
    2. LabelGenerator
* Tools that are built to convert between formats, help with etl, and model training all will operate on these containers
* Make sure to read basics. Explanations are not repeated here

In [2]:
# Shared resources:
# See annotation_type_basics to understand this section

from labelbox import Client
from labelbox.data.annotation_types import LabelCollection, LabelGenerator

from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option
from labelbox import LabelingFrontend
from labelbox.data.annotation_types import Label, RasterData, Mask, Point, Polygon, ClassificationAnswer, Radio, Checklist, ObjectAnnotation, ClassificationAnnotation
import numpy as np
import requests


image_url = "https://picsum.photos/id/1003/200/300"

client = Client()

def signing_function(obj_bytes: bytes) -> str:
    # Do not use this signer.. you will not be able to resign these images at a later date!!!
    url = client.upload_data(content=obj_bytes, sign=True)
    return url


xy_eye_polys = [
    [[82, 180], [83, 184], [88, 184], [86, 180]],
    [[97, 182], [99, 184], [102, 183], [101, 180], [98, 180]]
]
nose_poly = [[95, 192],
 [93, 197],
 [96, 198],
 [100, 197],
 [100, 194],
 [100, 192],
 [96, 192]
]       

xy_poly = [[60.215, 160.706], [67.135, 176.513], [76.36, 180.136], [76.69, 222.287], [81.632, 245.668], [77.678, 291.442],
 [72.077, 300], [86.904, 300], [94.482, 243.692], [103.378, 243.363], [100.413, 269.378], [90.199, 289.795],
 [95.141, 296.381], [103.708, 292.43], [107.662, 271.683], [110.957, 300], [121.171, 299.675], [117.217, 243.692], [127.761, 236.118],
 [132.703, 298.028], [142.258, 297.369], [136.657, 249.949], [145.553, 207.797], [137.975, 185.075],
 [120.182, 180.465], [105.026, 189.356], [111.616, 161.694], [92.835, 155.767], [72.077, 160.048]]

# Prob provide a color mapping or something..
h,w = 300, 200
eye_color = 255
nose_color = 128
eyes = [Polygon(points = [Point(x=x, y = y) for x,y in xy_eye_poly]) for xy_eye_poly in xy_eye_polys]
eye_masks = np.max([eye.raster(height = h, width = w, color = eye_color) for eye in eyes], axis = 0)
nose = Polygon(points = [Point(x=x, y = y) for x,y in nose_poly])
nose_mask = nose.raster(height = h, width = w, color = nose_color)
# Picks the brighter color if there is overlap. 
# If you don't want overlap then just simply create separate masks
np_seg_mask = np.max([nose_mask, eye_masks], axis = 0)

def get_labels():
    mask = RasterData(arr = np_seg_mask )
    return [Label(
        data = RasterData(im_bytes = requests.get(image_url).content),
        annotations = [
            ObjectAnnotation(
                value = Polygon(points = [Point(x = x, y = y) for x,y in xy_poly]),
                name = "deer"
            ),
            ObjectAnnotation(
                name = "deer_eyes",
                value = Mask(mask = mask, color = eye_color)
            ),  
           ObjectAnnotation(
                name = "deer_nose",
                value = Mask(mask = mask, color = nose_color),
                classifications = [
                    ClassificationAnnotation(
                        name = "description",
                        value = Radio(
                            answer = ClassificationAnswer(name = "wet")
                        )
                    )
                ]
            ), ObjectAnnotation(
                name = "deer_nose",
                value = Mask(mask = mask, color = nose_color),
                classifications = [
                    ClassificationAnnotation(
                        name = "description",
                        value = Radio(
                            answer = ClassificationAnswer(name = "wet")
                        )
                    )
                ]
            )
        ]
    )]


def show_schema_ids(label):
    for annotation in label.annotations:
        print(f"Object : {annotation.name} - {annotation.schema_id}")
        for classification in annotation.classifications:
            print(f"--- Subclass : {classification.name} - {classification.schema_id}")
            option = classification.value
            print(f"--- --- Options: {option.answer.name} - {option.answer.schema_id}")

        if isinstance(annotation, ClassificationAnnotation):
            for option in annotation.value.answer:
                print(f"--- Options: {option.name} - {option.schema_id}")

def setup_project():
    # These names have to match our object names exactly!!
    ontology_builder = OntologyBuilder(tools=[
        Tool(tool=Tool.Type.POLYGON, name="deer"),
        Tool(tool=Tool.Type.SEGMENTATION, name="deer_nose", 
             classifications = [
                 Classification(
                     class_type = Classification.Type.RADIO, 
                     instructions = "description", 
                     options = [Option(value = "wet")]
                 )]),
        Tool(tool=Tool.Type.SEGMENTATION, name="deer_eyes")    
    ], classifications = [
        Classification(
            Classification.Type.CHECKLIST, 
            instructions = "image_description", 
            options = [Option(value = "bright"), Option(value = "not_blurry"), Option(value = "dark")])])

    editor = next(
        client.get_labeling_frontends(where=LabelingFrontend.name == "Editor"))
    project = client.create_project(name="test_annotation_types")
    project.setup(editor, ontology_builder.asdict())
    dataset = client.create_dataset(name = 'my_ds')
    project.datasets.connect(dataset)

    ontology = OntologyBuilder.from_project(project)
    return ontology, dataset, project

def print_mask_urls(label):
    for annotation in label.annotations:
        if isinstance(annotation.value, Mask):
            print(annotation.value.mask.url)

# LabelCollection
* This object is essentially a list of Labels with a set of helpful utilties
* This object is simple and fast at the expense of memory
    * Larger datasets shouldn't use label collections ( or at least will require more ram ).
* Why use label collection over just a list of labels?
    * Multithreaded utilities (faster)
    * Compatible with converter functions (functions useful for translating between formats, etl, and training )

In [3]:
labels = get_labels()
label_collection = LabelCollection(labels)

## Iterate

In [4]:
# Iterable, Acts like a list
for label in label_collection:
    print(type(label))
# Get length
print(len(label_collection))
# By index
print(type(label_collection[0]))

<class 'labelbox.data.annotation_types.label.Label'>
1
<class 'labelbox.data.annotation_types.label.Label'>


### Upload segmentation masks

In [5]:
### Add urls to all segmentation masks:
# (in parallel)
for label in label_collection:
    print_mask_urls(label)
    
label_collection.add_url_to_masks(signing_function)

for label in label_collection:
    print_mask_urls(label)
# Again note that these all share the same segmentation mask
# ( This is determined by the fact that they share the same reference )
# This mask is only uploaded once

0it [00:00, ?it/s]

None
None
None


1it [00:00,  2.77it/s]

https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F90a848f5-2038-3d8f-e912-0ac34949a3a3-1?Expires=1627055774270&KeyName=labelbox-assets-key-3&Signature=cCgkDBIRsjWSYPe7JdRIBjgUyPs
https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F90a848f5-2038-3d8f-e912-0ac34949a3a3-1?Expires=1627055774270&KeyName=labelbox-assets-key-3&Signature=cCgkDBIRsjWSYPe7JdRIBjgUyPs
https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F90a848f5-2038-3d8f-e912-0ac34949a3a3-1?Expires=1627055774270&KeyName=labelbox-assets-key-3&Signature=cCgkDBIRsjWSYPe7JdRIBjgUyPs





### Create signed urls for data

In [6]:
### Add urls to all segmentation masks:
# (in parallel)
print(label_collection[0].data.url)
label_collection.add_url_to_data(signing_function)
print(label_collection[0].data.url)

0it [00:00, ?it/s]

None


1it [00:00,  2.78it/s]

https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3da0abde-b2f4-b453-e65f-589aad3c989f-1?Expires=1627055774638&KeyName=labelbox-assets-key-3&Signature=WkEwp5xanY5KBn9DdD5xlmm3S5k





### Add to labelbox dataset

In [7]:
# For the next two sections we need an ontology and dataset
ontology, dataset, project = setup_project()

In [8]:
print(label_collection[0].data.uid)
# Note that this function will assign a uuid as the external id if it isn't provided.
label_collection.add_to_dataset(dataset, signing_function)
print(label_collection[0].data.uid)

1it [00:00, 4293.04it/s]

None





ckrf3k6a85bxz0ytiew58btq7


### Add schema ids

In [9]:
for label in label_collection:
    show_schema_ids(label)
# Note that this function will assign a uuid as the external id if it isn't provided.
label_collection.assign_schema_ids(ontology)
print('-'* 50)
for label in label_collection:
    show_schema_ids(label)

Object : deer - None
Object : deer_eyes - None
Object : deer_nose - None
--- Subclass : description - None
--- --- Options: wet - None
Object : deer_nose - None
--- Subclass : description - None
--- --- Options: wet - None
--------------------------------------------------
Object : deer - ckrf3k4pt7qbm0y9j40kf8v54
Object : deer_eyes - ckrf3k4pu7qbq0y9jc91r6s16
Object : deer_nose - ckrf3k4pt7qbo0y9j4eswgg80
--- Subclass : description - ckrf3k4r27qc30y9j4wdph03x
--- --- Options: wet - ckrf3k4rn7qc50y9j4xz68f5m
Object : deer_nose - ckrf3k4pt7qbo0y9j4eswgg80
--- Subclass : description - ckrf3k4r27qc30y9j4wdph03x
--- --- Options: wet - ckrf3k4rn7qc50y9j4xz68f5m


In [10]:
# cleanup:
dataset.delete()
project.delete()

# LabelGenerator
* This object generates labels and provides a set of helpful utilties
* This object is complex and slower than LabelCollections to be highly memory efficient
    * Larger datasets should use label generators
* Why use label generator over just a generator that yields labels?
    * This object supports parallel io operations to buffer in the background.
    * Compatible with converter functions (functions useful for translating between formats, etl, and training )

In [None]:
labels = get_labels()
label_generator = LabelGenerator(labels)
ontology, dataset, project = setup_project()

In [None]:
# So we can't show the before and afters because the generator is not repeatable

try:
    
    label = next(label_generator)
    print("Ran once")
    label = next(label_generator)
    print("Ran twice")
except StopIteration:
    pass

In [None]:
# Does not support indexing ( it is a generator.. )
try:
    label_generator[0]
    print("Can index")
except TypeError:
    print("Unable to index")

    

In [None]:
def show_references(label):
    print('\n---  schema ids ---\n')
    show_schema_ids(label)
    print("\n--- mask urls ---\n")
    print_mask_urls(label)
    print('\n--- image url ---\n')
    print(label.data.url)    
    print('\n--- data row reference ---\n')
    print(original_label.data.uid)

In [None]:
# So lets add some functions to modify the result of the generator
# Recreate because we already went through all of the items when we showed that it isn't repeatable
original_label = labels[0]

show_references(original_label)
label_generator = LabelGenerator(labels) \
        .add_url_to_masks(signing_function) \
        .add_to_dataset(dataset, signing_function) \
        .assign_schema_ids(ontology)



In [None]:
# This is lazily evaluated.
# So even after defining the functions the ids aren't set

In [None]:
show_references(original_label)

In [None]:
label = next(label_generator)
show_references(original_label)

* Note that the first qsize elements run serially from when the chained functions are added.
* After that iterating will get much faster.

In [None]:
# Generators can be converted to Collections
LabelGenerator(labels).as_collection()


In [None]:
dataset.delete()
project.delete()