Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions labelbox/data/annotation_types/base_annotation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import abc
from uuid import UUID
from uuid import UUID, uuid4
from typing import Any, Dict, Optional
from labelbox import pydantic_compat

Expand All @@ -15,4 +15,4 @@ class BaseAnnotation(FeatureSchema, abc.ABC):
def __init__(self, **data):
super().__init__(**data)
extra_uuid = data.get("extra", {}).get("uuid")
self._uuid = data.get("_uuid") or extra_uuid or None
self._uuid = data.get("_uuid") or extra_uuid or uuid4()
116 changes: 72 additions & 44 deletions labelbox/data/serialization/ndjson/converter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
import copy
import logging
import uuid
from typing import Any, Dict, Generator, Iterable
from collections import defaultdict, deque
from typing import Any, Deque, Dict, Generator, Iterable, List, Set, Union

from labelbox.data.annotation_types.annotation import ObjectAnnotation
from labelbox.data.annotation_types.classification.classification import (
ClassificationAnnotation,)
from labelbox.data.annotation_types.metrics.confusion_matrix import (
ConfusionMatrixMetric,)
from labelbox.data.annotation_types.metrics.scalar import ScalarMetric
from labelbox.data.annotation_types.video import VideoMaskAnnotation

from ...annotation_types.collection import LabelCollection, LabelGenerator
from ...annotation_types.relationship import RelationshipAnnotation
Expand Down Expand Up @@ -42,51 +52,69 @@ def serialize(
Returns:
A generator for accessing the ndjson representation of the data
"""
used_annotation_uuids = set()
for label in labels:
annotation_uuid_to_generated_uuid_lookup = {}
# UUIDs are private properties used to enhance UX when defining relationships.
# They are created for all annotations, but only utilized for relationships.
# To avoid overwriting, UUIDs must be unique across labels.
# Non-relationship annotation UUIDs are dropped (server-side generation will occur).
# For relationship annotations, new UUIDs are generated and stored in a lookup table.
for annotation in label.annotations:
if isinstance(annotation, RelationshipAnnotation):
source_uuid = annotation.value.source._uuid
target_uuid = annotation.value.target._uuid
used_uuids: Set[uuid.UUID] = set()

if (len(
used_annotation_uuids.intersection(
{source_uuid, target_uuid})) > 0):
new_source_uuid = uuid.uuid4()
new_target_uuid = uuid.uuid4()

annotation_uuid_to_generated_uuid_lookup[
source_uuid] = new_source_uuid
annotation_uuid_to_generated_uuid_lookup[
target_uuid] = new_target_uuid
annotation.value.source._uuid = new_source_uuid
annotation.value.target._uuid = new_target_uuid
else:
annotation_uuid_to_generated_uuid_lookup[
source_uuid] = source_uuid
annotation_uuid_to_generated_uuid_lookup[
target_uuid] = target_uuid
used_annotation_uuids.add(annotation._uuid)
relationship_uuids: Dict[uuid.UUID,
Deque[uuid.UUID]] = defaultdict(deque)

# UUIDs are private properties used to enhance UX when defining relationships.
# They are created for all annotations, but only utilized for relationships.
# To avoid overwriting, UUIDs must be unique across labels.
# Non-relationship annotation UUIDs are regenerated when they are reused.
# For relationship annotations, during first pass, we update the UUIDs of the source and target annotations.
# During the second pass, we update the UUIDs of the annotations referenced by the relationship annotations.
for label in labels:
uuid_safe_annotations: List[Union[
ClassificationAnnotation,
ObjectAnnotation,
VideoMaskAnnotation,
ScalarMetric,
ConfusionMatrixMetric,
RelationshipAnnotation,
]] = []
# First pass to get all RelatiohnshipAnnotaitons
# and update the UUIDs of the source and target annotations
for relationship_annotation in (
annotation for annotation in label.annotations
if isinstance(annotation, RelationshipAnnotation)):
if relationship_annotation in uuid_safe_annotations:
relationship_annotation = copy.deepcopy(
relationship_annotation)
new_source_uuid = uuid.uuid4()
new_target_uuid = uuid.uuid4()
relationship_uuids[relationship_annotation.value.source.
_uuid].append(new_source_uuid)
relationship_uuids[relationship_annotation.value.target.
_uuid].append(new_target_uuid)
relationship_annotation.value.source._uuid = new_source_uuid
relationship_annotation.value.target._uuid = new_target_uuid
if relationship_annotation._uuid in used_uuids:
relationship_annotation._uuid = uuid.uuid4()
used_uuids.add(relationship_annotation._uuid)
uuid_safe_annotations.append(relationship_annotation)
# Second pass to update UUIDs for annotations referenced by RelationshipAnnotations
for annotation in label.annotations:
if (not isinstance(annotation, RelationshipAnnotation) and
hasattr(annotation, "_uuid")):
annotation._uuid = annotation_uuid_to_generated_uuid_lookup.get(
annotation._uuid, annotation._uuid)
if not isinstance(annotation, RelationshipAnnotation):
if hasattr(annotation, "_uuid"):
if annotation in uuid_safe_annotations:
annotation = copy.deepcopy(annotation)
next_uuids = relationship_uuids[annotation._uuid]
if len(next_uuids) > 0:
annotation._uuid = next_uuids.popleft()

for example in NDLabel.from_common(labels):
annotation_uuid = getattr(example, "uuid", None)
if annotation._uuid in used_uuids:
annotation._uuid = uuid.uuid4()
used_uuids.add(annotation._uuid)
uuid_safe_annotations.append(annotation)
label.annotations = uuid_safe_annotations
for example in NDLabel.from_common([label]):
annotation_uuid = getattr(example, "uuid", None)

res = example.dict(
by_alias=True,
exclude={"uuid"} if annotation_uuid == "None" else None)
for k, v in list(res.items()):
if k in IGNORE_IF_NONE and v is None:
del res[k]
yield res
res = example.dict(
by_alias=True,
exclude={"uuid"} if annotation_uuid == "None" else None,
)
for k, v in list(res.items()):
if k in IGNORE_IF_NONE and v is None:
del res[k]
yield res
5 changes: 4 additions & 1 deletion tests/data/serialization/ndjson/test_checklist.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_serialization_min():
}
serialized = NDJsonConverter.serialize([label])
res = next(serialized)

res.pop("uuid")
assert res == expected

deserialized = NDJsonConverter.deserialize([res])
Expand Down Expand Up @@ -112,6 +112,7 @@ def test_serialization_with_classification():
serialized = NDJsonConverter.serialize([label])
res = next(serialized)

res.pop("uuid")
assert res == expected

deserialized = NDJsonConverter.deserialize([res])
Expand Down Expand Up @@ -195,6 +196,7 @@ def test_serialization_with_classification_double_nested():
serialized = NDJsonConverter.serialize([label])
res = next(serialized)

res.pop("uuid")
assert res == expected

deserialized = NDJsonConverter.deserialize([res])
Expand Down Expand Up @@ -274,6 +276,7 @@ def test_serialization_with_classification_double_nested_2():

serialized = NDJsonConverter.serialize([label])
res = next(serialized)
res.pop("uuid")
assert res == expected

deserialized = NDJsonConverter.deserialize([res])
Expand Down
1 change: 1 addition & 0 deletions tests/data/serialization/ndjson/test_conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
[free_text_label, free_text_ndjson]])
def test_message_based_radio_classification(label, ndjson):
serialized_label = list(NDJsonConverter().serialize(label))
serialized_label[0].pop('uuid')
assert serialized_label == ndjson

deserialized_label = list(NDJsonConverter().deserialize(ndjson))
Expand Down
1 change: 1 addition & 0 deletions tests/data/serialization/ndjson/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def test_pdf_with_name_only():

def test_pdf_bbox_serialize():
serialized = list(NDJsonConverter.serialize(bbox_labels))
serialized[0].pop('uuid')
assert serialized == bbox_ndjson


Expand Down
1 change: 1 addition & 0 deletions tests/data/serialization/ndjson/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def test_mask_from_arr():
],
data=ImageData(uid="0" * 25))
res = next(NDJsonConverter.serialize([label]))
res.pop("uuid")
assert res == {
"classifications": [],
"schemaId": "1" * 25,
Expand Down
2 changes: 2 additions & 0 deletions tests/data/serialization/ndjson/test_radio.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def test_serialization_with_radio_min():
serialized = NDJsonConverter.serialize([label])
res = next(serialized)

res.pop("uuid")
assert res == expected

deserialized = NDJsonConverter.deserialize([res])
Expand Down Expand Up @@ -85,6 +86,7 @@ def test_serialization_with_radio_classification():

serialized = NDJsonConverter.serialize([label])
res = next(serialized)
res.pop("uuid")
assert res == expected

deserialized = NDJsonConverter.deserialize([res])
Expand Down
25 changes: 20 additions & 5 deletions tests/data/serialization/ndjson/test_relationship.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,37 @@
import json
import pytest
from uuid import uuid4

import pytest

from labelbox.data.serialization.ndjson.converter import NDJsonConverter


def test_relationship():
with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
data = json.load(file)

res = list(NDJsonConverter.deserialize(data))
res = list(NDJsonConverter.serialize(res))
assert len(res) == len(data)

res_relationship_annotation = [
annot for annot in res if "relationship" in annot
][0]
res_source_and_target = [
annot for annot in res if "relationship" not in annot
]
assert res_relationship_annotation

assert res == data
assert res_relationship_annotation["relationship"]["source"] in [
annot["uuid"] for annot in res_source_and_target
]
assert res_relationship_annotation["relationship"]["target"] in [
annot["uuid"] for annot in res_source_and_target
]


def test_relationship_nonexistent_object():
with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
data = json.load(file)

relationship_annotation = data[2]
Expand All @@ -30,7 +45,7 @@ def test_relationship_nonexistent_object():


def test_relationship_duplicate_uuids():
with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
data = json.load(file)

source, target = data[0], data[1]
Expand Down
2 changes: 2 additions & 0 deletions tests/data/serialization/ndjson/test_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ def test_video_classification_global_subclassifications():

serialized = NDJsonConverter.serialize([label])
res = [x for x in serialized]
for annotations in res:
annotations.pop("uuid")
assert res == [expected_first_annotation, expected_second_annotation]

deserialized = NDJsonConverter.deserialize(res)
Expand Down