Labelbox · mnoszczak · Feb 20, 2024 · Feb 19, 2024 · Feb 19, 2024 · Feb 19, 2024
@@ -1,5 +1,5 @@
 import abc
-from uuid import UUID
+from uuid import UUID, uuid4
 from typing import Any, Dict, Optional
 from labelbox import pydantic_compat
 
@@ -15,4 +15,4 @@ class BaseAnnotation(FeatureSchema, abc.ABC):
     def __init__(self, **data):
         super().__init__(**data)
         extra_uuid = data.get("extra", {}).get("uuid")
-        self._uuid = data.get("_uuid") or extra_uuid or None
+        self._uuid = data.get("_uuid") or extra_uuid or uuid4()
@@ -1,6 +1,16 @@
+import copy
 import logging
 import uuid
-from typing import Any, Dict, Generator, Iterable
+from collections import defaultdict, deque
+from typing import Any, Deque, Dict, Generator, Iterable, List, Set, Union
+
+from labelbox.data.annotation_types.annotation import ObjectAnnotation
+from labelbox.data.annotation_types.classification.classification import (
+    ClassificationAnnotation,)
+from labelbox.data.annotation_types.metrics.confusion_matrix import (
+    ConfusionMatrixMetric,)
+from labelbox.data.annotation_types.metrics.scalar import ScalarMetric
+from labelbox.data.annotation_types.video import VideoMaskAnnotation
 
 from ...annotation_types.collection import LabelCollection, LabelGenerator
 from ...annotation_types.relationship import RelationshipAnnotation
@@ -42,51 +52,69 @@ def serialize(
         Returns:
             A generator for accessing the ndjson representation of the data
         """
-        used_annotation_uuids = set()
-        for label in labels:
-            annotation_uuid_to_generated_uuid_lookup = {}
-            # UUIDs are private properties used to enhance UX when defining relationships.
-            # They are created for all annotations, but only utilized for relationships.
-            # To avoid overwriting, UUIDs must be unique across labels.
-            # Non-relationship annotation UUIDs are dropped (server-side generation will occur).
-            # For relationship annotations, new UUIDs are generated and stored in a lookup table.
-            for annotation in label.annotations:
-                if isinstance(annotation, RelationshipAnnotation):
-                    source_uuid = annotation.value.source._uuid
-                    target_uuid = annotation.value.target._uuid
+        used_uuids: Set[uuid.UUID] = set()
 
-                    if (len(
-                            used_annotation_uuids.intersection(
-                                {source_uuid, target_uuid})) > 0):
-                        new_source_uuid = uuid.uuid4()
-                        new_target_uuid = uuid.uuid4()
-
-                        annotation_uuid_to_generated_uuid_lookup[
-                            source_uuid] = new_source_uuid
-                        annotation_uuid_to_generated_uuid_lookup[
-                            target_uuid] = new_target_uuid
-                        annotation.value.source._uuid = new_source_uuid
-                        annotation.value.target._uuid = new_target_uuid
-                    else:
-                        annotation_uuid_to_generated_uuid_lookup[
-                            source_uuid] = source_uuid
-                        annotation_uuid_to_generated_uuid_lookup[
-                            target_uuid] = target_uuid
-                    used_annotation_uuids.add(annotation._uuid)
+        relationship_uuids: Dict[uuid.UUID,
+                                 Deque[uuid.UUID]] = defaultdict(deque)
 
+        # UUIDs are private properties used to enhance UX when defining relationships.
+        # They are created for all annotations, but only utilized for relationships.
+        # To avoid overwriting, UUIDs must be unique across labels.
+        # Non-relationship annotation UUIDs are regenerated when they are reused.
+        # For relationship annotations, during first pass, we update the UUIDs of the source and target annotations.
+        # During the second pass, we update the UUIDs of the annotations referenced by the relationship annotations.
+        for label in labels:
+            uuid_safe_annotations: List[Union[
+                ClassificationAnnotation,
+                ObjectAnnotation,
+                VideoMaskAnnotation,
+                ScalarMetric,
+                ConfusionMatrixMetric,
+                RelationshipAnnotation,
+            ]] = []
+            # First pass to get all RelatiohnshipAnnotaitons
+            # and update the UUIDs of the source and target annotations
+            for relationship_annotation in (
+                    annotation for annotation in label.annotations
+                    if isinstance(annotation, RelationshipAnnotation)):
+                if relationship_annotation in uuid_safe_annotations:
+                    relationship_annotation = copy.deepcopy(
+                        relationship_annotation)
+                new_source_uuid = uuid.uuid4()
+                new_target_uuid = uuid.uuid4()
+                relationship_uuids[relationship_annotation.value.source.
+                                   _uuid].append(new_source_uuid)
+                relationship_uuids[relationship_annotation.value.target.
+                                   _uuid].append(new_target_uuid)
+                relationship_annotation.value.source._uuid = new_source_uuid
+                relationship_annotation.value.target._uuid = new_target_uuid
+                if relationship_annotation._uuid in used_uuids:
+                    relationship_annotation._uuid = uuid.uuid4()
+                used_uuids.add(relationship_annotation._uuid)
+                uuid_safe_annotations.append(relationship_annotation)
+            # Second pass to update UUIDs for annotations referenced by RelationshipAnnotations
             for annotation in label.annotations:
-                if (not isinstance(annotation, RelationshipAnnotation) and
-                        hasattr(annotation, "_uuid")):
-                    annotation._uuid = annotation_uuid_to_generated_uuid_lookup.get(
-                        annotation._uuid, annotation._uuid)
+                if not isinstance(annotation, RelationshipAnnotation):
+                    if hasattr(annotation, "_uuid"):
+                        if annotation in uuid_safe_annotations:
+                            annotation = copy.deepcopy(annotation)
+                        next_uuids = relationship_uuids[annotation._uuid]
+                        if len(next_uuids) > 0:
+                            annotation._uuid = next_uuids.popleft()
 
-        for example in NDLabel.from_common(labels):
-            annotation_uuid = getattr(example, "uuid", None)
+                        if annotation._uuid in used_uuids:
+                            annotation._uuid = uuid.uuid4()
+                        used_uuids.add(annotation._uuid)
+                    uuid_safe_annotations.append(annotation)
+            label.annotations = uuid_safe_annotations
+            for example in NDLabel.from_common([label]):
+                annotation_uuid = getattr(example, "uuid", None)
 
-            res = example.dict(
-                by_alias=True,
-                exclude={"uuid"} if annotation_uuid == "None" else None)
-            for k, v in list(res.items()):
-                if k in IGNORE_IF_NONE and v is None:
-                    del res[k]
-            yield res
+                res = example.dict(
+                    by_alias=True,
+                    exclude={"uuid"} if annotation_uuid == "None" else None,
+                )
+                for k, v in list(res.items()):
+                    if k in IGNORE_IF_NONE and v is None:
+                        del res[k]
+                yield res
@@ -32,7 +32,7 @@ def test_serialization_min():
     }
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
-
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
@@ -112,6 +112,7 @@ def test_serialization_with_classification():
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
 
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
@@ -195,6 +196,7 @@ def test_serialization_with_classification_double_nested():
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
 
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
@@ -274,6 +276,7 @@ def test_serialization_with_classification_double_nested_2():
 
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])

@@ -83,6 +83,7 @@
      [free_text_label, free_text_ndjson]])
 def test_message_based_radio_classification(label, ndjson):
     serialized_label = list(NDJsonConverter().serialize(label))
+    serialized_label[0].pop('uuid')
     assert serialized_label == ndjson
 
     deserialized_label = list(NDJsonConverter().deserialize(ndjson))

@@ -69,6 +69,7 @@ def test_pdf_with_name_only():
 
 def test_pdf_bbox_serialize():
     serialized = list(NDJsonConverter.serialize(bbox_labels))
+    serialized[0].pop('uuid')
     assert serialized == bbox_ndjson
 
 

@@ -90,6 +90,7 @@ def test_mask_from_arr():
     ],
                   data=ImageData(uid="0" * 25))
     res = next(NDJsonConverter.serialize([label]))
+    res.pop("uuid")
     assert res == {
         "classifications": [],
         "schemaId": "1" * 25,

@@ -34,6 +34,7 @@ def test_serialization_with_radio_min():
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
 
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
@@ -85,6 +86,7 @@ def test_serialization_with_radio_classification():
 
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])

@@ -1,22 +1,37 @@
 import json
-import pytest
 from uuid import uuid4
 
+import pytest
+
 from labelbox.data.serialization.ndjson.converter import NDJsonConverter
 
 
 def test_relationship():
-    with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
+    with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
         data = json.load(file)
 
     res = list(NDJsonConverter.deserialize(data))
     res = list(NDJsonConverter.serialize(res))
+    assert len(res) == len(data)
+
+    res_relationship_annotation = [
+        annot for annot in res if "relationship" in annot
+    ][0]
+    res_source_and_target = [
+        annot for annot in res if "relationship" not in annot
+    ]
+    assert res_relationship_annotation
 
-    assert res == data
+    assert res_relationship_annotation["relationship"]["source"] in [
+        annot["uuid"] for annot in res_source_and_target
+    ]
+    assert res_relationship_annotation["relationship"]["target"] in [
+        annot["uuid"] for annot in res_source_and_target
+    ]
 
 
 def test_relationship_nonexistent_object():
-    with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
+    with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
         data = json.load(file)
 
     relationship_annotation = data[2]
@@ -30,7 +45,7 @@ def test_relationship_nonexistent_object():
 
 
 def test_relationship_duplicate_uuids():
-    with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
+    with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
         data = json.load(file)
 
     source, target = data[0], data[1]

@@ -87,6 +87,8 @@ def test_video_classification_global_subclassifications():
 
     serialized = NDJsonConverter.serialize([label])
     res = [x for x in serialized]
+    for annotations in res:
+        annotations.pop("uuid")
     assert res == [expected_first_annotation, expected_second_annotation]
 
     deserialized = NDJsonConverter.deserialize(res)