From e4e99693d875523c94c326091ac0b7923a8c63ce Mon Sep 17 00:00:00 2001
From: Michal Noszczak <mnoszczak@labelbox.com>
Date: Mon, 19 Feb 2024 08:45:39 +0000
Subject: [PATCH 1/4] Refactor converter implementation

---
 .../data/annotation_types/base_annotation.py  |   4 +-
 .../data/serialization/ndjson/converter.py    | 109 +++++++++++-------
 2 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/labelbox/data/annotation_types/base_annotation.py b/labelbox/data/annotation_types/base_annotation.py
index 9949bd7f8..4a16b8b17 100644
--- a/labelbox/data/annotation_types/base_annotation.py
+++ b/labelbox/data/annotation_types/base_annotation.py
@@ -1,5 +1,5 @@
 import abc
-from uuid import UUID
+from uuid import UUID, uuid4
 from typing import Any, Dict, Optional
 from labelbox import pydantic_compat
 
@@ -15,4 +15,4 @@ class BaseAnnotation(FeatureSchema, abc.ABC):
     def __init__(self, **data):
         super().__init__(**data)
         extra_uuid = data.get("extra", {}).get("uuid")
-        self._uuid = data.get("_uuid") or extra_uuid or None
+        self._uuid = data.get("_uuid") or extra_uuid or uuid4()
diff --git a/labelbox/data/serialization/ndjson/converter.py b/labelbox/data/serialization/ndjson/converter.py
index 4647533ab..a7c54b109 100644
--- a/labelbox/data/serialization/ndjson/converter.py
+++ b/labelbox/data/serialization/ndjson/converter.py
@@ -1,6 +1,16 @@
+import copy
 import logging
 import uuid
-from typing import Any, Dict, Generator, Iterable
+from collections import defaultdict, deque
+from typing import Any, Deque, Dict, Generator, Iterable, List, Set, Union
+
+from labelbox.data.annotation_types.annotation import ObjectAnnotation
+from labelbox.data.annotation_types.classification.classification import (
+    ClassificationAnnotation,)
+from labelbox.data.annotation_types.metrics.confusion_matrix import (
+    ConfusionMatrixMetric,)
+from labelbox.data.annotation_types.metrics.scalar import ScalarMetric
+from labelbox.data.annotation_types.video import VideoMaskAnnotation
 
 from ...annotation_types.collection import LabelCollection, LabelGenerator
 from ...annotation_types.relationship import RelationshipAnnotation
@@ -42,51 +52,68 @@ def serialize(
         Returns:
             A generator for accessing the ndjson representation of the data
         """
-        used_annotation_uuids = set()
+        used_uuids: Set[uuid.UUID] = set()
+
+        relationship_uuids: Dict[uuid.UUID,
+                                 Deque[uuid.UUID]] = defaultdict(deque)
+
+        # UUIDs are private properties used to enhance UX when defining relationships.
+        # They are created for all annotations, but only utilized for relationships.
+        # To avoid overwriting, UUIDs must be unique across labels.
+        # Non-relationship annotation UUIDs are regenerated when they are reused.
+        # For relationship annotations, during first pass, we update the UUIDs of the source and target annotations.
+        # During the second pass, we update the UUIDs of the annotations referenced by the relationship annotations.
         for label in labels:
-            annotation_uuid_to_generated_uuid_lookup = {}
-            # UUIDs are private properties used to enhance UX when defining relationships.
-            # They are created for all annotations, but only utilized for relationships.
-            # To avoid overwriting, UUIDs must be unique across labels.
-            # Non-relationship annotation UUIDs are dropped (server-side generation will occur).
-            # For relationship annotations, new UUIDs are generated and stored in a lookup table.
+            uuid_safe_annotations: List[Union[
+                ClassificationAnnotation,
+                ObjectAnnotation,
+                VideoMaskAnnotation,
+                ScalarMetric,
+                ConfusionMatrixMetric,
+                RelationshipAnnotation,
+            ]] = []
+            # First pass to get all RelatiohnshipAnnotaitons
+            # and update the UUIDs of the source and target annotations
             for annotation in label.annotations:
                 if isinstance(annotation, RelationshipAnnotation):
-                    source_uuid = annotation.value.source._uuid
-                    target_uuid = annotation.value.target._uuid
-
-                    if (len(
-                            used_annotation_uuids.intersection(
-                                {source_uuid, target_uuid})) > 0):
-                        new_source_uuid = uuid.uuid4()
-                        new_target_uuid = uuid.uuid4()
-
-                        annotation_uuid_to_generated_uuid_lookup[
-                            source_uuid] = new_source_uuid
-                        annotation_uuid_to_generated_uuid_lookup[
-                            target_uuid] = new_target_uuid
-                        annotation.value.source._uuid = new_source_uuid
-                        annotation.value.target._uuid = new_target_uuid
-                    else:
-                        annotation_uuid_to_generated_uuid_lookup[
-                            source_uuid] = source_uuid
-                        annotation_uuid_to_generated_uuid_lookup[
-                            target_uuid] = target_uuid
-                    used_annotation_uuids.add(annotation._uuid)
-
+                    annotation = copy.deepcopy(annotation)
+                    new_source_uuid = uuid.uuid4()
+                    new_target_uuid = uuid.uuid4()
+                    relationship_uuids[annotation.value.source._uuid].append(
+                        new_source_uuid)
+                    relationship_uuids[annotation.value.target._uuid].append(
+                        new_target_uuid)
+                    annotation.value.source._uuid = new_source_uuid
+                    annotation.value.target._uuid = new_target_uuid
+                    if annotation._uuid in used_uuids:
+                        annotation._uuid = uuid.uuid4()
+                    used_uuids.add(annotation._uuid)
+                    uuid_safe_annotations.append(annotation)
+            # Second pass to update UUIDs for annotations referenced by RelationshipAnnotations
             for annotation in label.annotations:
                 if (not isinstance(annotation, RelationshipAnnotation) and
                         hasattr(annotation, "_uuid")):
-                    annotation._uuid = annotation_uuid_to_generated_uuid_lookup.get(
-                        annotation._uuid, annotation._uuid)
+                    annotation = copy.deepcopy(annotation)
+                    next_uuids = relationship_uuids[annotation._uuid]
+                    if len(next_uuids) > 0:
+                        annotation._uuid = next_uuids.popleft()
 
-        for example in NDLabel.from_common(labels):
-            annotation_uuid = getattr(example, "uuid", None)
+                    if annotation._uuid in used_uuids:
+                        annotation._uuid = uuid.uuid4()
+                    used_uuids.add(annotation._uuid)
+                    uuid_safe_annotations.append(annotation)
+                else:
+                    if not isinstance(annotation, RelationshipAnnotation):
+                        uuid_safe_annotations.append(annotation)
+            label.annotations = uuid_safe_annotations
+            for example in NDLabel.from_common([label]):
+                annotation_uuid = getattr(example, "uuid", None)
 
-            res = example.dict(
-                by_alias=True,
-                exclude={"uuid"} if annotation_uuid == "None" else None)
-            for k, v in list(res.items()):
-                if k in IGNORE_IF_NONE and v is None:
-                    del res[k]
-            yield res
+                res = example.dict(
+                    by_alias=True,
+                    exclude={"uuid"} if annotation_uuid == "None" else None,
+                )
+                for k, v in list(res.items()):
+                    if k in IGNORE_IF_NONE and v is None:
+                        del res[k]
+                yield res

From e67070dead3a56e37d01b394f6dfa0b5ed41624f Mon Sep 17 00:00:00 2001
From: Michal Noszczak <mnoszczak@labelbox.com>
Date: Mon, 19 Feb 2024 08:45:57 +0000
Subject: [PATCH 2/4] Restore previous test cases

---
 tests/data/serialization/ndjson/test_checklist.py    | 5 ++++-
 tests/data/serialization/ndjson/test_conversation.py | 1 +
 tests/data/serialization/ndjson/test_document.py     | 1 +
 tests/data/serialization/ndjson/test_image.py        | 1 +
 tests/data/serialization/ndjson/test_radio.py        | 2 ++
 tests/data/serialization/ndjson/test_video.py        | 2 ++
 6 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/tests/data/serialization/ndjson/test_checklist.py b/tests/data/serialization/ndjson/test_checklist.py
index 2ff41b407..612325f34 100644
--- a/tests/data/serialization/ndjson/test_checklist.py
+++ b/tests/data/serialization/ndjson/test_checklist.py
@@ -32,7 +32,7 @@ def test_serialization_min():
     }
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
-
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
@@ -112,6 +112,7 @@ def test_serialization_with_classification():
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
 
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
@@ -195,6 +196,7 @@ def test_serialization_with_classification_double_nested():
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
 
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
@@ -274,6 +276,7 @@ def test_serialization_with_classification_double_nested_2():
 
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
diff --git a/tests/data/serialization/ndjson/test_conversation.py b/tests/data/serialization/ndjson/test_conversation.py
index 38df3e1ab..acf21cc21 100644
--- a/tests/data/serialization/ndjson/test_conversation.py
+++ b/tests/data/serialization/ndjson/test_conversation.py
@@ -83,6 +83,7 @@
      [free_text_label, free_text_ndjson]])
 def test_message_based_radio_classification(label, ndjson):
     serialized_label = list(NDJsonConverter().serialize(label))
+    serialized_label[0].pop('uuid')
     assert serialized_label == ndjson
 
     deserialized_label = list(NDJsonConverter().deserialize(ndjson))
diff --git a/tests/data/serialization/ndjson/test_document.py b/tests/data/serialization/ndjson/test_document.py
index 601262a2c..a6aa03908 100644
--- a/tests/data/serialization/ndjson/test_document.py
+++ b/tests/data/serialization/ndjson/test_document.py
@@ -69,6 +69,7 @@ def test_pdf_with_name_only():
 
 def test_pdf_bbox_serialize():
     serialized = list(NDJsonConverter.serialize(bbox_labels))
+    serialized[0].pop('uuid')
     assert serialized == bbox_ndjson
 
 
diff --git a/tests/data/serialization/ndjson/test_image.py b/tests/data/serialization/ndjson/test_image.py
index f8d3812f0..e36ce6f50 100644
--- a/tests/data/serialization/ndjson/test_image.py
+++ b/tests/data/serialization/ndjson/test_image.py
@@ -90,6 +90,7 @@ def test_mask_from_arr():
     ],
                   data=ImageData(uid="0" * 25))
     res = next(NDJsonConverter.serialize([label]))
+    res.pop("uuid")
     assert res == {
         "classifications": [],
         "schemaId": "1" * 25,
diff --git a/tests/data/serialization/ndjson/test_radio.py b/tests/data/serialization/ndjson/test_radio.py
index 83776c1ec..583eb1fa0 100644
--- a/tests/data/serialization/ndjson/test_radio.py
+++ b/tests/data/serialization/ndjson/test_radio.py
@@ -34,6 +34,7 @@ def test_serialization_with_radio_min():
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
 
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
@@ -85,6 +86,7 @@ def test_serialization_with_radio_classification():
 
     serialized = NDJsonConverter.serialize([label])
     res = next(serialized)
+    res.pop("uuid")
     assert res == expected
 
     deserialized = NDJsonConverter.deserialize([res])
diff --git a/tests/data/serialization/ndjson/test_video.py b/tests/data/serialization/ndjson/test_video.py
index 12067f2e5..5946d0bab 100644
--- a/tests/data/serialization/ndjson/test_video.py
+++ b/tests/data/serialization/ndjson/test_video.py
@@ -87,6 +87,8 @@ def test_video_classification_global_subclassifications():
 
     serialized = NDJsonConverter.serialize([label])
     res = [x for x in serialized]
+    for annotations in res:
+        annotations.pop("uuid")
     assert res == [expected_first_annotation, expected_second_annotation]
 
     deserialized = NDJsonConverter.deserialize(res)

From e751a3e16b0125e958ca74a14a643d07090101dd Mon Sep 17 00:00:00 2001
From: Michal Noszczak <mnoszczak@labelbox.com>
Date: Mon, 19 Feb 2024 08:46:17 +0000
Subject: [PATCH 3/4] Improve test case for relationships

---
 .../serialization/ndjson/test_relationship.py | 25 +++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/tests/data/serialization/ndjson/test_relationship.py b/tests/data/serialization/ndjson/test_relationship.py
index 5285d0195..0f6f640df 100644
--- a/tests/data/serialization/ndjson/test_relationship.py
+++ b/tests/data/serialization/ndjson/test_relationship.py
@@ -1,22 +1,37 @@
 import json
-import pytest
 from uuid import uuid4
 
+import pytest
+
 from labelbox.data.serialization.ndjson.converter import NDJsonConverter
 
 
 def test_relationship():
-    with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
+    with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
         data = json.load(file)
 
     res = list(NDJsonConverter.deserialize(data))
     res = list(NDJsonConverter.serialize(res))
+    assert len(res) == len(data)
+
+    res_relationship_annotation = [
+        annot for annot in res if "relationship" in annot
+    ][0]
+    res_source_and_target = [
+        annot for annot in res if "relationship" not in annot
+    ]
+    assert res_relationship_annotation
 
-    assert res == data
+    assert res_relationship_annotation["relationship"]["source"] in [
+        annot["uuid"] for annot in res_source_and_target
+    ]
+    assert res_relationship_annotation["relationship"]["target"] in [
+        annot["uuid"] for annot in res_source_and_target
+    ]
 
 
 def test_relationship_nonexistent_object():
-    with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
+    with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
         data = json.load(file)
 
     relationship_annotation = data[2]
@@ -30,7 +45,7 @@ def test_relationship_nonexistent_object():
 
 
 def test_relationship_duplicate_uuids():
-    with open('tests/data/assets/ndjson/relationship_import.json', 'r') as file:
+    with open("tests/data/assets/ndjson/relationship_import.json", "r") as file:
         data = json.load(file)
 
     source, target = data[0], data[1]

From c5d2710c56323db951077ec3d6b3ac93a3603150 Mon Sep 17 00:00:00 2001
From: Michal Noszczak <mnoszczak@labelbox.com>
Date: Mon, 19 Feb 2024 22:53:14 +0000
Subject: [PATCH 4/4] CR Changes

---
 .../data/serialization/ndjson/converter.py    | 55 ++++++++++---------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/labelbox/data/serialization/ndjson/converter.py b/labelbox/data/serialization/ndjson/converter.py
index a7c54b109..0292d2bbd 100644
--- a/labelbox/data/serialization/ndjson/converter.py
+++ b/labelbox/data/serialization/ndjson/converter.py
@@ -74,37 +74,38 @@ def serialize(
             ]] = []
             # First pass to get all RelatiohnshipAnnotaitons
             # and update the UUIDs of the source and target annotations
-            for annotation in label.annotations:
-                if isinstance(annotation, RelationshipAnnotation):
-                    annotation = copy.deepcopy(annotation)
-                    new_source_uuid = uuid.uuid4()
-                    new_target_uuid = uuid.uuid4()
-                    relationship_uuids[annotation.value.source._uuid].append(
-                        new_source_uuid)
-                    relationship_uuids[annotation.value.target._uuid].append(
-                        new_target_uuid)
-                    annotation.value.source._uuid = new_source_uuid
-                    annotation.value.target._uuid = new_target_uuid
-                    if annotation._uuid in used_uuids:
-                        annotation._uuid = uuid.uuid4()
-                    used_uuids.add(annotation._uuid)
-                    uuid_safe_annotations.append(annotation)
+            for relationship_annotation in (
+                    annotation for annotation in label.annotations
+                    if isinstance(annotation, RelationshipAnnotation)):
+                if relationship_annotation in uuid_safe_annotations:
+                    relationship_annotation = copy.deepcopy(
+                        relationship_annotation)
+                new_source_uuid = uuid.uuid4()
+                new_target_uuid = uuid.uuid4()
+                relationship_uuids[relationship_annotation.value.source.
+                                   _uuid].append(new_source_uuid)
+                relationship_uuids[relationship_annotation.value.target.
+                                   _uuid].append(new_target_uuid)
+                relationship_annotation.value.source._uuid = new_source_uuid
+                relationship_annotation.value.target._uuid = new_target_uuid
+                if relationship_annotation._uuid in used_uuids:
+                    relationship_annotation._uuid = uuid.uuid4()
+                used_uuids.add(relationship_annotation._uuid)
+                uuid_safe_annotations.append(relationship_annotation)
             # Second pass to update UUIDs for annotations referenced by RelationshipAnnotations
             for annotation in label.annotations:
-                if (not isinstance(annotation, RelationshipAnnotation) and
-                        hasattr(annotation, "_uuid")):
-                    annotation = copy.deepcopy(annotation)
-                    next_uuids = relationship_uuids[annotation._uuid]
-                    if len(next_uuids) > 0:
-                        annotation._uuid = next_uuids.popleft()
+                if not isinstance(annotation, RelationshipAnnotation):
+                    if hasattr(annotation, "_uuid"):
+                        if annotation in uuid_safe_annotations:
+                            annotation = copy.deepcopy(annotation)
+                        next_uuids = relationship_uuids[annotation._uuid]
+                        if len(next_uuids) > 0:
+                            annotation._uuid = next_uuids.popleft()
 
-                    if annotation._uuid in used_uuids:
-                        annotation._uuid = uuid.uuid4()
-                    used_uuids.add(annotation._uuid)
+                        if annotation._uuid in used_uuids:
+                            annotation._uuid = uuid.uuid4()
+                        used_uuids.add(annotation._uuid)
                     uuid_safe_annotations.append(annotation)
-                else:
-                    if not isinstance(annotation, RelationshipAnnotation):
-                        uuid_safe_annotations.append(annotation)
             label.annotations = uuid_safe_annotations
             for example in NDLabel.from_common([label]):
                 annotation_uuid = getattr(example, "uuid", None)