From ee2d55c443876d9b2f0cdb95141500343b7cf655 Mon Sep 17 00:00:00 2001
From: Kevin Kim <kkim@kevins-mbp-2.lan>
Date: Fri, 10 Mar 2023 09:45:58 -0800
Subject: [PATCH 1/5] First commit for supporting globalKeys in annotation
 import SDK

---
 labelbox/__init__.py                          |  2 +-
 .../data/annotation_types/data/base_data.py   |  1 +
 labelbox/schema/annotation_import.py          | 32 +++++++++++++++++
 .../annotation_import/test_label_import.py    | 35 +++++++++++++++++++
 .../test_mea_prediction_import.py             | 32 +++++++++++++++++
 5 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/labelbox/__init__.py b/labelbox/__init__.py
index 47b4a9d70..6cc005fba 100644
--- a/labelbox/__init__.py
+++ b/labelbox/__init__.py
@@ -5,7 +5,7 @@
 from labelbox.schema.project import Project
 from labelbox.schema.model import Model
 from labelbox.schema.bulk_import_request import BulkImportRequest
-from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport
+from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport, MEAToMALPredictionImport
 from labelbox.schema.dataset import Dataset
 from labelbox.schema.data_row import DataRow
 from labelbox.schema.label import Label
diff --git a/labelbox/data/annotation_types/data/base_data.py b/labelbox/data/annotation_types/data/base_data.py
index 0c376db37..2ccda34c3 100644
--- a/labelbox/data/annotation_types/data/base_data.py
+++ b/labelbox/data/annotation_types/data/base_data.py
@@ -11,5 +11,6 @@ class BaseData(BaseModel, ABC):
     """
     external_id: Optional[str] = None
     uid: Optional[str] = None
+    global_key: Optional[str] = None
     media_attributes: Optional[Dict[str, Any]] = None
     metadata: Optional[List[Dict[str, Any]]] = None
diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py
index e2f5f2557..65261a62f 100644
--- a/labelbox/schema/annotation_import.py
+++ b/labelbox/schema/annotation_import.py
@@ -155,6 +155,8 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]],
             )
 
         objects = serialize_labels(objects)
+        # cls.validate_data_rows(objects)
+
         data_str = ndjson.dumps(objects)
         if not data_str:
             raise ValueError(f"{object_name} cannot be empty")
@@ -171,6 +173,36 @@ def refresh(self) -> None:
                             as_json=True)
         self._set_field_values(res)
 
+    @classmethod
+    def validate_data_rows(cls, objects: List[Dict[str, Any]]):
+        """
+        Validates annotations by checking 'dataRow' is provided
+        and only one of 'id' or 'globalKey' is provided.
+
+        Shows up to `max_num_errors` errors if invalidated, to prevent
+        large number of error messages from being printed out 
+        """
+        errors = []
+        max_num_errors = 100
+        for object in objects:
+            if 'dataRow' not in object:
+                errors.append(f"'dataRow' is missing in {object}")
+            elif 'id' in object['dataRow'] and 'globalKey' in object['dataRow']:
+                errors.append(
+                    f"Must provide only one of 'id' or 'globalKey' for 'dataRow' in {object}"
+                )
+
+        if errors:
+            errors_length = len(errors)
+            formatted_errors = '\n'.join(errors[:max_num_errors])
+            if errors_length > max_num_errors:
+                logger.warning(
+                    f"Found more than {max_num_errors} errors. Showing first {max_num_errors} error messages..."
+                )
+            raise ValueError(
+                f"Error while validating annotations. Found {errors_length} annotations with errors. Errors:\n{formatted_errors}"
+            )
+
     @classmethod
     def from_name(cls,
                   client: "labelbox.Client",
diff --git a/tests/integration/annotation_import/test_label_import.py b/tests/integration/annotation_import/test_label_import.py
index 2fc8ac84d..9848186e0 100644
--- a/tests/integration/annotation_import/test_label_import.py
+++ b/tests/integration/annotation_import/test_label_import.py
@@ -38,6 +38,41 @@ def test_create_from_objects(client, configured_project, object_predictions,
         label_import.input_file_url, object_predictions)
 
 
+def test_data_row_validation_errors(client, configured_project,
+                                    object_predictions):
+    name = str(uuid.uuid4())
+    # Set up data for validation errors
+    # Invalid: Remove 'dataRow' part entirely
+    del object_predictions[0]['dataRow']
+
+    # Invalid: Set both id and globalKey
+    object_predictions[1]['dataRow'] = {
+        'id': 'some id',
+        'globalKey': 'some global key'
+    }
+
+    # Valid
+    object_predictions[2]['dataRow'] = {
+        'id': 'some id',
+    }
+
+    # Valid
+    object_predictions[3]['dataRow'] = {
+        'globalKey': 'some global key',
+    }
+
+    with pytest.raises(ValueError) as exc_info:
+        label_import = LabelImport.create_from_objects(
+            client=client,
+            project_id=configured_project.uid,
+            name=name,
+            labels=object_predictions)
+    exception_str = str(exc_info.value)
+    assert "Found 2 annotations with errors" in exception_str
+    assert "'dataRow' is missing in" in exception_str
+    assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str
+
+
 def test_create_from_label_objects(client, configured_project,
                                    object_predictions,
                                    annotation_import_test_helpers):
diff --git a/tests/integration/annotation_import/test_mea_prediction_import.py b/tests/integration/annotation_import/test_mea_prediction_import.py
index afca122b6..531139fcc 100644
--- a/tests/integration/annotation_import/test_mea_prediction_import.py
+++ b/tests/integration/annotation_import/test_mea_prediction_import.py
@@ -11,6 +11,38 @@
 """
 
 
+def test_data_row_validation_errors(model_run_with_all_project_labels,
+                                    object_predictions):
+    name = str(uuid.uuid4())
+    # Set up data for validation errors
+    # Invalid: Remove 'dataRow' part entirely
+    del object_predictions[0]['dataRow']
+
+    # Invalid: Set both id and globalKey
+    object_predictions[1]['dataRow'] = {
+        'id': 'some id',
+        'globalKey': 'some global key'
+    }
+
+    # Valid
+    object_predictions[2]['dataRow'] = {
+        'id': 'some id',
+    }
+
+    # Valid
+    object_predictions[3]['dataRow'] = {
+        'globalKey': 'some global key',
+    }
+
+    with pytest.raises(ValueError) as exc_info:
+        model_run_with_all_project_labels.add_predictions(
+            name=name, predictions=object_predictions)
+    exception_str = str(exc_info.value)
+    assert "Found 2 annotations with errors" in exception_str
+    assert "'dataRow' is missing in" in exception_str
+    assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str
+
+
 def test_create_from_url(model_run_with_model_run_data_rows,
                          annotation_import_test_helpers):
     name = str(uuid.uuid4())

From 9793391a4051e07f87f050a5d7ff4448fd4774ca Mon Sep 17 00:00:00 2001
From: Kevin Kim <kkim@kevins-mbp-2.lan>
Date: Fri, 10 Mar 2023 18:01:55 -0800
Subject: [PATCH 2/5] Global keys to Annotation Types, and add validation when
 both id and globalKey are provided

---
 labelbox/data/annotation_types/data/raster.py |  5 ++-
 labelbox/data/annotation_types/data/text.py   |  6 ++-
 labelbox/data/annotation_types/data/video.py  |  6 ++-
 labelbox/data/serialization/ndjson/base.py    | 27 +++++++++----
 .../serialization/ndjson/classification.py    |  8 ++--
 labelbox/data/serialization/ndjson/label.py   | 38 +++++++++++++------
 labelbox/data/serialization/ndjson/metric.py  |  6 +--
 labelbox/data/serialization/ndjson/objects.py | 14 +++----
 labelbox/schema/annotation_import.py          |  2 +-
 9 files changed, 71 insertions(+), 41 deletions(-)

diff --git a/labelbox/data/annotation_types/data/raster.py b/labelbox/data/annotation_types/data/raster.py
index 152960a09..45998cb26 100644
--- a/labelbox/data/annotation_types/data/raster.py
+++ b/labelbox/data/annotation_types/data/raster.py
@@ -163,9 +163,10 @@ def validate_args(cls, values):
         url = values.get("url")
         arr = values.get("arr")
         uid = values.get('uid')
-        if uid == file_path == im_bytes == url == None and arr is None:
+        global_key = values.get('global_key')
+        if uid == file_path == im_bytes == url == global_key == None and arr is None:
             raise ValueError(
-                "One of `file_path`, `im_bytes`, `url`, `uid` or `arr` required."
+                "One of `file_path`, `im_bytes`, `url`, `uid`, `global_key` or `arr` required."
             )
         if arr is not None:
             if arr.dtype != np.uint8:
diff --git a/labelbox/data/annotation_types/data/text.py b/labelbox/data/annotation_types/data/text.py
index 816f39c2e..9149b0394 100644
--- a/labelbox/data/annotation_types/data/text.py
+++ b/labelbox/data/annotation_types/data/text.py
@@ -93,9 +93,11 @@ def validate_date(cls, values):
         text = values.get("text")
         url = values.get("url")
         uid = values.get('uid')
-        if uid == file_path == text == url == None:
+        global_key = values.get('global_key')
+        if uid == file_path == text == url == global_key == None:
             raise ValueError(
-                "One of `file_path`, `text`, `uid`, or `url` required.")
+                "One of `file_path`, `text`, `uid`, `global_key` or `url` required."
+            )
         return values
 
     def __repr__(self) -> str:
diff --git a/labelbox/data/annotation_types/data/video.py b/labelbox/data/annotation_types/data/video.py
index 8d835acdb..53cee7280 100644
--- a/labelbox/data/annotation_types/data/video.py
+++ b/labelbox/data/annotation_types/data/video.py
@@ -153,10 +153,12 @@ def validate_data(cls, values):
         url = values.get("url")
         frames = values.get("frames")
         uid = values.get("uid")
+        global_key = values.get("global_key")
 
-        if uid == file_path == frames == url == None:
+        if uid == file_path == frames == url == global_key == None:
             raise ValueError(
-                "One of `file_path`, `frames`, `uid`, or `url` required.")
+                "One of `file_path`, `frames`, `uid`, `global_key` or `url` required."
+            )
         return values
 
     def __repr__(self) -> str:
diff --git a/labelbox/data/serialization/ndjson/base.py b/labelbox/data/serialization/ndjson/base.py
index 654c32495..b3ef0fc8b 100644
--- a/labelbox/data/serialization/ndjson/base.py
+++ b/labelbox/data/serialization/ndjson/base.py
@@ -8,15 +8,17 @@
 
 class DataRow(BaseModel):
     id: str = None
+    global_key: str = None
 
-    @validator('id', pre=True, always=True)
-    def validate_id(cls, v):
-        if v is None:
-            raise ValueError(
-                "Data row ids are not set. Use `LabelGenerator.add_to_dataset`,or `Label.create_data_row`. "
-                "You can also manually assign the id for each `BaseData` object"
-            )
-        return v
+    @root_validator()
+    def must_set_one(cls, values):
+        if bool(values.get('id')) == bool(values.get('global_key')):
+            raise ValueError("Must set either id or global_key")
+        return values
+
+    class Config:
+        allow_population_by_field_name = True
+        alias_generator = camel_case
 
 
 class NDJsonBase(BaseModel):
@@ -27,6 +29,15 @@ class NDJsonBase(BaseModel):
     def set_id(cls, v):
         return v or str(uuid4())
 
+    def dict(self, *args, **kwargs):
+        """ Pop missing id or missing globalKey from dataRow """
+        res = super().dict(*args, **kwargs)
+        if not self.data_row.id:
+            res['dataRow'].pop('id')
+        if not self.data_row.global_key:
+            res['dataRow'].pop('globalKey')
+        return res
+
     class Config:
         allow_population_by_field_name = True
         alias_generator = camel_case
diff --git a/labelbox/data/serialization/ndjson/classification.py b/labelbox/data/serialization/ndjson/classification.py
index bbd4342e2..7b48c3a30 100644
--- a/labelbox/data/serialization/ndjson/classification.py
+++ b/labelbox/data/serialization/ndjson/classification.py
@@ -8,7 +8,7 @@
 from ...annotation_types.classification.classification import ClassificationAnswer, Dropdown, Text, Checklist, Radio
 from ...annotation_types.types import Cuid
 from ...annotation_types.data import TextData, VideoData, ImageData
-from .base import NDAnnotation
+from .base import DataRow, NDAnnotation
 
 
 class NDFeature(ConfidenceMixin):
@@ -125,7 +125,7 @@ def from_common(cls, text: Text, name: str, feature_schema_id: Cuid,
                                                        ImageData]) -> "NDText":
         return cls(
             answer=text.answer,
-            data_row={'id': data.uid},
+            data_row=DataRow(id=data.uid, global_key=data.global_key),
             name=name,
             schema_id=feature_schema_id,
             uuid=extra.get('uuid'),
@@ -145,7 +145,7 @@ def from_common(
                       confidence=answer.confidence)
             for answer in checklist.answer
         ],
-                   data_row={'id': data.uid},
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
@@ -161,7 +161,7 @@ def from_common(cls, radio: Radio, name: str, feature_schema_id: Cuid,
         return cls(answer=NDFeature(name=radio.answer.name,
                                     schema_id=radio.answer.feature_schema_id,
                                     confidence=radio.answer.confidence),
-                   data_row={'id': data.uid},
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
diff --git a/labelbox/data/serialization/ndjson/label.py b/labelbox/data/serialization/ndjson/label.py
index 8dd339767..2bc8bddb9 100644
--- a/labelbox/data/serialization/ndjson/label.py
+++ b/labelbox/data/serialization/ndjson/label.py
@@ -1,6 +1,6 @@
 from itertools import groupby
 from operator import itemgetter
-from typing import Dict, Generator, List, Tuple, Union
+from typing import Dict, Generator, List, Optional, Tuple, Union
 from collections import defaultdict
 import warnings
 
@@ -17,6 +17,7 @@
 from .metric import NDScalarMetric, NDMetricAnnotation, NDConfusionMatrixMetric
 from .classification import NDChecklistSubclass, NDClassification, NDClassificationType, NDRadioSubclass
 from .objects import NDObject, NDObjectType, NDSegments
+from .base import DataRow
 
 
 class NDLabel(BaseModel):
@@ -27,7 +28,10 @@ class NDLabel(BaseModel):
     def to_common(self) -> LabelGenerator:
         grouped_annotations = defaultdict(list)
         for annotation in self.annotations:
-            grouped_annotations[annotation.data_row.id].append(annotation)
+            grouped_annotations[annotation.data_row.id or
+                                annotation.data_row.global_key].append(
+                                    annotation)
+        print(grouped_annotations)
         return LabelGenerator(
             data=self._generate_annotations(grouped_annotations))
 
@@ -45,9 +49,11 @@ def _generate_annotations(
                                              NDConfusionMatrixMetric,
                                              NDScalarMetric, NDSegments]]]
     ) -> Generator[Label, None, None]:
-        for data_row_id, annotations in grouped_annotations.items():
+        for _, annotations in grouped_annotations.items():
             annots = []
+            data_row = annotations[0].data_row
             for annotation in annotations:
+
                 if isinstance(annotation, NDSegments):
                     annots.extend(
                         NDSegments.to_common(annotation, annotation.name,
@@ -62,22 +68,30 @@ def _generate_annotations(
                 else:
                     raise TypeError(
                         f"Unsupported annotation. {type(annotation)}")
-            data = self._infer_media_type(annots)(uid=data_row_id)
-            yield Label(annotations=annots, data=data)
+            yield Label(annotations=annots,
+                        data=self._infer_media_type(data_row, annots))
 
     def _infer_media_type(
-        self, annotations: List[Union[TextEntity, VideoClassificationAnnotation,
-                                      VideoObjectAnnotation, ObjectAnnotation,
-                                      ClassificationAnnotation, ScalarMetric,
-                                      ConfusionMatrixMetric]]
+        self, data_row: DataRow,
+        annotations: List[Union[TextEntity, VideoClassificationAnnotation,
+                                VideoObjectAnnotation, ObjectAnnotation,
+                                ClassificationAnnotation, ScalarMetric,
+                                ConfusionMatrixMetric]]
     ) -> Union[TextData, VideoData, ImageData]:
+        if len(annotations) == 0:
+            raise ValueError("Missing annotations while inferring media type")
+
         types = {type(annotation) for annotation in annotations}
+        data = ImageData
         if TextEntity in types:
-            return TextData
+            data = TextData
         elif VideoClassificationAnnotation in types or VideoObjectAnnotation in types:
-            return VideoData
+            data = VideoData
+
+        if data_row.id:
+            return data(uid=data_row.id)
         else:
-            return ImageData
+            return data(global_key=data_row.global_key)
 
     @staticmethod
     def _get_consecutive_frames(
diff --git a/labelbox/data/serialization/ndjson/metric.py b/labelbox/data/serialization/ndjson/metric.py
index a3f648698..5abbf2761 100644
--- a/labelbox/data/serialization/ndjson/metric.py
+++ b/labelbox/data/serialization/ndjson/metric.py
@@ -1,7 +1,7 @@
 from typing import Optional, Union, Type
 
 from labelbox.data.annotation_types.data import ImageData, TextData
-from labelbox.data.serialization.ndjson.base import NDJsonBase
+from labelbox.data.serialization.ndjson.base import DataRow, NDJsonBase
 from labelbox.data.annotation_types.metrics.scalar import (
     ScalarMetric, ScalarMetricAggregation, ScalarMetricValue,
     ScalarMetricConfidenceValue)
@@ -50,7 +50,7 @@ def from_common(
                    feature_name=metric.feature_name,
                    subclass_name=metric.subclass_name,
                    aggregation=metric.aggregation,
-                   data_row={'id': data.uid})
+                   data_row=DataRow(id=data.uid, global_key=data.global_key))
 
 
 class NDScalarMetric(BaseNDMetric):
@@ -75,7 +75,7 @@ def from_common(cls, metric: ScalarMetric,
                    feature_name=metric.feature_name,
                    subclass_name=metric.subclass_name,
                    aggregation=metric.aggregation.value,
-                   data_row={'id': data.uid})
+                   data_row=DataRow(id=data.uid, global_key=data.global_key))
 
     def dict(self, *args, **kwargs):
         res = super().dict(*args, **kwargs)
diff --git a/labelbox/data/serialization/ndjson/objects.py b/labelbox/data/serialization/ndjson/objects.py
index 796465c4b..70e672816 100644
--- a/labelbox/data/serialization/ndjson/objects.py
+++ b/labelbox/data/serialization/ndjson/objects.py
@@ -60,7 +60,7 @@ def from_common(cls,
             'x': point.x,
             'y': point.y
         },
-                   dataRow=DataRow(id=data.uid),
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
@@ -105,7 +105,7 @@ def from_common(cls,
             'x': pt.x,
             'y': pt.y
         } for pt in line.points],
-                   dataRow=DataRow(id=data.uid),
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
@@ -154,7 +154,7 @@ def from_common(cls,
             'x': pt.x,
             'y': pt.y
         } for pt in polygon.points],
-                   dataRow=DataRow(id=data.uid),
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
@@ -183,7 +183,7 @@ def from_common(cls,
                              left=rectangle.start.x,
                              height=rectangle.end.y - rectangle.start.y,
                              width=rectangle.end.x - rectangle.start.x),
-                   dataRow=DataRow(id=data.uid),
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
@@ -280,7 +280,7 @@ def from_common(cls, segments: List[VideoObjectAnnotation], data: VideoData,
         segments = [NDSegment.from_common(segment) for segment in segments]
 
         return cls(segments=segments,
-                   dataRow=DataRow(id=data.uid),
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'))
@@ -332,7 +332,7 @@ def from_common(cls,
                 png=base64.b64encode(im_bytes.getvalue()).decode('utf-8'))
 
         return cls(mask=lbv1_mask,
-                   dataRow=DataRow(id=data.uid),
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
@@ -364,7 +364,7 @@ def from_common(cls,
             start=text_entity.start,
             end=text_entity.end,
         ),
-                   dataRow=DataRow(id=data.uid),
+                   data_row=DataRow(id=data.uid, global_key=data.global_key),
                    name=name,
                    schema_id=feature_schema_id,
                    uuid=extra.get('uuid'),
diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py
index 65261a62f..473350608 100644
--- a/labelbox/schema/annotation_import.py
+++ b/labelbox/schema/annotation_import.py
@@ -155,7 +155,7 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]],
             )
 
         objects = serialize_labels(objects)
-        # cls.validate_data_rows(objects)
+        cls.validate_data_rows(objects)
 
         data_str = ndjson.dumps(objects)
         if not data_str:

From 63d30eb0a430ad5c8ded07b03d4c2d9419d0c8be Mon Sep 17 00:00:00 2001
From: Kevin Kim <kkim@kevins-mbp-2.lan>
Date: Fri, 10 Mar 2023 18:04:36 -0800
Subject: [PATCH 3/5] Remove prints

---
 labelbox/data/serialization/ndjson/label.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/labelbox/data/serialization/ndjson/label.py b/labelbox/data/serialization/ndjson/label.py
index 2bc8bddb9..f9e848a64 100644
--- a/labelbox/data/serialization/ndjson/label.py
+++ b/labelbox/data/serialization/ndjson/label.py
@@ -31,7 +31,6 @@ def to_common(self) -> LabelGenerator:
             grouped_annotations[annotation.data_row.id or
                                 annotation.data_row.global_key].append(
                                     annotation)
-        print(grouped_annotations)
         return LabelGenerator(
             data=self._generate_annotations(grouped_annotations))
 

From 67dc7a9c165de7c6a36876ac1885498c3bd355ae Mon Sep 17 00:00:00 2001
From: Kevin Kim <kkim@kevins-mbp-2.lan>
Date: Mon, 13 Mar 2023 16:32:01 -0700
Subject: [PATCH 4/5] Address PR comments and fix build failure

---
 labelbox/data/serialization/ndjson/base.py    |   4 +-
 labelbox/schema/annotation_import.py          |   8 +-
 labelbox/utils.py                             |   4 +
 tests/data/annotation_types/test_metrics.py   |   3 +
 .../classification_import_global_key.json     |  34 +
 .../ndjson/image_import_global_key.json       | 793 ++++++++++++++++++
 .../ndjson/metric_import_global_key.json      |   1 +
 .../assets/ndjson/pdf_import_global_key.json  | 100 +++
 .../ndjson/polyline_import_global_key.json    |  26 +
 .../ndjson/text_entity_import_global_key.json |  16 +
 .../ndjson/video_import_global_key.json       | 106 +++
 .../serialization/ndjson/test_global_key.py   |  67 ++
 .../annotation_import/test_label_import.py    |  57 --
 .../test_mea_prediction_import.py             |  32 -
 tests/unit/test_annotation_import.py          |  85 ++
 15 files changed, 1242 insertions(+), 94 deletions(-)
 create mode 100644 tests/data/assets/ndjson/classification_import_global_key.json
 create mode 100644 tests/data/assets/ndjson/image_import_global_key.json
 create mode 100644 tests/data/assets/ndjson/metric_import_global_key.json
 create mode 100644 tests/data/assets/ndjson/pdf_import_global_key.json
 create mode 100644 tests/data/assets/ndjson/polyline_import_global_key.json
 create mode 100644 tests/data/assets/ndjson/text_entity_import_global_key.json
 create mode 100644 tests/data/assets/ndjson/video_import_global_key.json
 create mode 100644 tests/data/serialization/ndjson/test_global_key.py
 create mode 100644 tests/unit/test_annotation_import.py

diff --git a/labelbox/data/serialization/ndjson/base.py b/labelbox/data/serialization/ndjson/base.py
index b3ef0fc8b..493771bfe 100644
--- a/labelbox/data/serialization/ndjson/base.py
+++ b/labelbox/data/serialization/ndjson/base.py
@@ -2,7 +2,7 @@
 from uuid import uuid4
 from pydantic import BaseModel, root_validator, validator, Field
 
-from labelbox.utils import camel_case
+from labelbox.utils import camel_case, is_exactly_one_set
 from ...annotation_types.types import Cuid
 
 
@@ -12,7 +12,7 @@ class DataRow(BaseModel):
 
     @root_validator()
     def must_set_one(cls, values):
-        if bool(values.get('id')) == bool(values.get('global_key')):
+        if is_exactly_one_set(values.get('id'), values.get('global_key')):
             raise ValueError("Must set either id or global_key")
         return values
 
diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py
index 473350608..a22525816 100644
--- a/labelbox/schema/annotation_import.py
+++ b/labelbox/schema/annotation_import.py
@@ -14,6 +14,7 @@
 from labelbox.orm import query
 from labelbox.orm.db_object import DbObject
 from labelbox.orm.model import Field, Relationship
+from labelbox.utils import is_exactly_one_set
 from labelbox.schema.confidence_presence_checker import LabelsConfidencePresenceChecker
 from labelbox.schema.enums import AnnotationImportState
 from labelbox.schema.serialization import serialize_labels
@@ -155,7 +156,7 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]],
             )
 
         objects = serialize_labels(objects)
-        cls.validate_data_rows(objects)
+        cls._validate_data_rows(objects)
 
         data_str = ndjson.dumps(objects)
         if not data_str:
@@ -174,7 +175,7 @@ def refresh(self) -> None:
         self._set_field_values(res)
 
     @classmethod
-    def validate_data_rows(cls, objects: List[Dict[str, Any]]):
+    def _validate_data_rows(cls, objects: List[Dict[str, Any]]):
         """
         Validates annotations by checking 'dataRow' is provided
         and only one of 'id' or 'globalKey' is provided.
@@ -187,7 +188,8 @@ def validate_data_rows(cls, objects: List[Dict[str, Any]]):
         for object in objects:
             if 'dataRow' not in object:
                 errors.append(f"'dataRow' is missing in {object}")
-            elif 'id' in object['dataRow'] and 'globalKey' in object['dataRow']:
+            elif not is_exactly_one_set(object['dataRow'].get('id'),
+                                        object['dataRow'].get('globalKey')):
                 errors.append(
                     f"Must provide only one of 'id' or 'globalKey' for 'dataRow' in {object}"
                 )
diff --git a/labelbox/utils.py b/labelbox/utils.py
index 624437182..27106abb5 100644
--- a/labelbox/utils.py
+++ b/labelbox/utils.py
@@ -26,6 +26,10 @@ def snake_case(s):
     return _convert(s, "_", lambda i: False)
 
 
+def is_exactly_one_set(x, y):
+    return not (bool(x) == bool(y))
+
+
 class _CamelCaseMixin(BaseModel):
 
     class Config:
diff --git a/tests/data/annotation_types/test_metrics.py b/tests/data/annotation_types/test_metrics.py
index 389c154aa..3bc447309 100644
--- a/tests/data/annotation_types/test_metrics.py
+++ b/tests/data/annotation_types/test_metrics.py
@@ -18,6 +18,7 @@ def test_legacy_scalar_metric():
         'data': {
             'external_id': None,
             'uid': 'ckrmd9q8g000009mg6vej7hzg',
+            'global_key': None,
             'im_bytes': None,
             'file_path': None,
             'url': None,
@@ -68,6 +69,7 @@ def test_custom_scalar_metric(feature_name, subclass_name, aggregation, value):
         'data': {
             'external_id': None,
             'uid': 'ckrmd9q8g000009mg6vej7hzg',
+            'global_key': None,
             'im_bytes': None,
             'file_path': None,
             'url': None,
@@ -124,6 +126,7 @@ def test_custom_confusison_matrix_metric(feature_name, subclass_name,
         'data': {
             'external_id': None,
             'uid': 'ckrmd9q8g000009mg6vej7hzg',
+            'global_key': None,
             'im_bytes': None,
             'file_path': None,
             'url': None,
diff --git a/tests/data/assets/ndjson/classification_import_global_key.json b/tests/data/assets/ndjson/classification_import_global_key.json
new file mode 100644
index 000000000..0c8e5482b
--- /dev/null
+++ b/tests/data/assets/ndjson/classification_import_global_key.json
@@ -0,0 +1,34 @@
+[
+    {
+        "answer": {
+            "schemaId": "ckrb1sfl8099g0y91cxbd5ftb",
+            "confidence": 0.8
+        },
+        "schemaId": "c123",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673"
+    },
+    {
+        "answer": [
+            {
+                "schemaId": "ckrb1sfl8099e0y919v260awv",
+                "confidence": 0.82
+            }
+        ],
+        "schemaId": "ckrb1sfkn099c0y910wbo0p1a",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "uuid": "d009925d-91a3-4f67-abd9-753453f5a584"
+    },
+    {
+        "answer": "a value",
+        "schemaId": "ckrb1sfkn099c0y910wbo0p1a",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "uuid": "d009925d-91a3-4f67-abd9-753453f5a584"
+    }
+]
\ No newline at end of file
diff --git a/tests/data/assets/ndjson/image_import_global_key.json b/tests/data/assets/ndjson/image_import_global_key.json
new file mode 100644
index 000000000..64961fc75
--- /dev/null
+++ b/tests/data/assets/ndjson/image_import_global_key.json
@@ -0,0 +1,793 @@
+[
+    {
+        "uuid": "b862c586-8614-483c-b5e6-82810f70cac0",
+        "schemaId": "ckrazcueb16og0z6609jj7y3y",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "confidence": 0.851,
+        "bbox": {
+            "top": 1352,
+            "left": 2275,
+            "height": 350,
+            "width": 139
+        }
+    },
+    {
+        "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6",
+        "schemaId": "ckrazcuec16ok0z66f956apb7",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "confidence": 0.834,
+        "mask": {
+            "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY",
+            "colorRGB": [
+                255,
+                0,
+                0
+            ]
+        }
+    },
+    {
+        "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd",
+        "schemaId": "ckrazcuec16oi0z66dzrd8pfl",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "confidence": 0.986,
+        "polygon": [
+            {
+                "x": 1118,
+                "y": 935
+            },
+            {
+                "x": 1117,
+                "y": 935
+            },
+            {
+                "x": 1116,
+                "y": 935
+            },
+            {
+                "x": 1115,
+                "y": 935
+            },
+            {
+                "x": 1114,
+                "y": 935
+            },
+            {
+                "x": 1113,
+                "y": 935
+            },
+            {
+                "x": 1112,
+                "y": 935
+            },
+            {
+                "x": 1111,
+                "y": 935
+            },
+            {
+                "x": 1110,
+                "y": 935
+            },
+            {
+                "x": 1109,
+                "y": 935
+            },
+            {
+                "x": 1108,
+                "y": 935
+            },
+            {
+                "x": 1108,
+                "y": 934
+            },
+            {
+                "x": 1107,
+                "y": 934
+            },
+            {
+                "x": 1106,
+                "y": 934
+            },
+            {
+                "x": 1105,
+                "y": 934
+            },
+            {
+                "x": 1105,
+                "y": 933
+            },
+            {
+                "x": 1104,
+                "y": 933
+            },
+            {
+                "x": 1103,
+                "y": 933
+            },
+            {
+                "x": 1103,
+                "y": 932
+            },
+            {
+                "x": 1102,
+                "y": 932
+            },
+            {
+                "x": 1101,
+                "y": 932
+            },
+            {
+                "x": 1100,
+                "y": 932
+            },
+            {
+                "x": 1099,
+                "y": 932
+            },
+            {
+                "x": 1098,
+                "y": 932
+            },
+            {
+                "x": 1097,
+                "y": 932
+            },
+            {
+                "x": 1097,
+                "y": 931
+            },
+            {
+                "x": 1096,
+                "y": 931
+            },
+            {
+                "x": 1095,
+                "y": 931
+            },
+            {
+                "x": 1094,
+                "y": 931
+            },
+            {
+                "x": 1093,
+                "y": 931
+            },
+            {
+                "x": 1092,
+                "y": 931
+            },
+            {
+                "x": 1091,
+                "y": 931
+            },
+            {
+                "x": 1090,
+                "y": 931
+            },
+            {
+                "x": 1090,
+                "y": 930
+            },
+            {
+                "x": 1089,
+                "y": 930
+            },
+            {
+                "x": 1088,
+                "y": 930
+            },
+            {
+                "x": 1087,
+                "y": 930
+            },
+            {
+                "x": 1087,
+                "y": 929
+            },
+            {
+                "x": 1086,
+                "y": 929
+            },
+            {
+                "x": 1085,
+                "y": 929
+            },
+            {
+                "x": 1084,
+                "y": 929
+            },
+            {
+                "x": 1084,
+                "y": 928
+            },
+            {
+                "x": 1083,
+                "y": 928
+            },
+            {
+                "x": 1083,
+                "y": 927
+            },
+            {
+                "x": 1082,
+                "y": 927
+            },
+            {
+                "x": 1081,
+                "y": 927
+            },
+            {
+                "x": 1081,
+                "y": 926
+            },
+            {
+                "x": 1080,
+                "y": 926
+            },
+            {
+                "x": 1080,
+                "y": 925
+            },
+            {
+                "x": 1079,
+                "y": 925
+            },
+            {
+                "x": 1078,
+                "y": 925
+            },
+            {
+                "x": 1078,
+                "y": 924
+            },
+            {
+                "x": 1077,
+                "y": 924
+            },
+            {
+                "x": 1076,
+                "y": 924
+            },
+            {
+                "x": 1076,
+                "y": 923
+            },
+            {
+                "x": 1075,
+                "y": 923
+            },
+            {
+                "x": 1074,
+                "y": 923
+            },
+            {
+                "x": 1073,
+                "y": 923
+            },
+            {
+                "x": 1073,
+                "y": 922
+            },
+            {
+                "x": 1072,
+                "y": 922
+            },
+            {
+                "x": 1071,
+                "y": 922
+            },
+            {
+                "x": 1070,
+                "y": 922
+            },
+            {
+                "x": 1070,
+                "y": 921
+            },
+            {
+                "x": 1069,
+                "y": 921
+            },
+            {
+                "x": 1068,
+                "y": 921
+            },
+            {
+                "x": 1067,
+                "y": 921
+            },
+            {
+                "x": 1066,
+                "y": 921
+            },
+            {
+                "x": 1065,
+                "y": 921
+            },
+            {
+                "x": 1064,
+                "y": 921
+            },
+            {
+                "x": 1063,
+                "y": 921
+            },
+            {
+                "x": 1062,
+                "y": 921
+            },
+            {
+                "x": 1061,
+                "y": 921
+            },
+            {
+                "x": 1060,
+                "y": 921
+            },
+            {
+                "x": 1059,
+                "y": 921
+            },
+            {
+                "x": 1058,
+                "y": 921
+            },
+            {
+                "x": 1058,
+                "y": 920
+            },
+            {
+                "x": 1057,
+                "y": 920
+            },
+            {
+                "x": 1057,
+                "y": 919
+            },
+            {
+                "x": 1056,
+                "y": 919
+            },
+            {
+                "x": 1057,
+                "y": 918
+            },
+            {
+                "x": 1057,
+                "y": 918
+            },
+            {
+                "x": 1057,
+                "y": 917
+            },
+            {
+                "x": 1058,
+                "y": 916
+            },
+            {
+                "x": 1058,
+                "y": 916
+            },
+            {
+                "x": 1059,
+                "y": 915
+            },
+            {
+                "x": 1059,
+                "y": 915
+            },
+            {
+                "x": 1060,
+                "y": 914
+            },
+            {
+                "x": 1060,
+                "y": 914
+            },
+            {
+                "x": 1061,
+                "y": 913
+            },
+            {
+                "x": 1061,
+                "y": 913
+            },
+            {
+                "x": 1062,
+                "y": 912
+            },
+            {
+                "x": 1063,
+                "y": 912
+            },
+            {
+                "x": 1063,
+                "y": 912
+            },
+            {
+                "x": 1064,
+                "y": 911
+            },
+            {
+                "x": 1064,
+                "y": 911
+            },
+            {
+                "x": 1065,
+                "y": 910
+            },
+            {
+                "x": 1066,
+                "y": 910
+            },
+            {
+                "x": 1066,
+                "y": 910
+            },
+            {
+                "x": 1067,
+                "y": 909
+            },
+            {
+                "x": 1068,
+                "y": 909
+            },
+            {
+                "x": 1068,
+                "y": 909
+            },
+            {
+                "x": 1069,
+                "y": 908
+            },
+            {
+                "x": 1070,
+                "y": 908
+            },
+            {
+                "x": 1071,
+                "y": 908
+            },
+            {
+                "x": 1072,
+                "y": 908
+            },
+            {
+                "x": 1072,
+                "y": 908
+            },
+            {
+                "x": 1073,
+                "y": 907
+            },
+            {
+                "x": 1074,
+                "y": 907
+            },
+            {
+                "x": 1075,
+                "y": 907
+            },
+            {
+                "x": 1076,
+                "y": 907
+            },
+            {
+                "x": 1077,
+                "y": 907
+            },
+            {
+                "x": 1078,
+                "y": 907
+            },
+            {
+                "x": 1079,
+                "y": 907
+            },
+            {
+                "x": 1080,
+                "y": 907
+            },
+            {
+                "x": 1081,
+                "y": 907
+            },
+            {
+                "x": 1082,
+                "y": 907
+            },
+            {
+                "x": 1083,
+                "y": 907
+            },
+            {
+                "x": 1084,
+                "y": 907
+            },
+            {
+                "x": 1085,
+                "y": 907
+            },
+            {
+                "x": 1086,
+                "y": 907
+            },
+            {
+                "x": 1087,
+                "y": 907
+            },
+            {
+                "x": 1088,
+                "y": 907
+            },
+            {
+                "x": 1089,
+                "y": 907
+            },
+            {
+                "x": 1090,
+                "y": 907
+            },
+            {
+                "x": 1091,
+                "y": 907
+            },
+            {
+                "x": 1091,
+                "y": 908
+            },
+            {
+                "x": 1092,
+                "y": 908
+            },
+            {
+                "x": 1093,
+                "y": 908
+            },
+            {
+                "x": 1094,
+                "y": 908
+            },
+            {
+                "x": 1095,
+                "y": 908
+            },
+            {
+                "x": 1095,
+                "y": 909
+            },
+            {
+                "x": 1096,
+                "y": 909
+            },
+            {
+                "x": 1097,
+                "y": 909
+            },
+            {
+                "x": 1097,
+                "y": 910
+            },
+            {
+                "x": 1098,
+                "y": 910
+            },
+            {
+                "x": 1099,
+                "y": 910
+            },
+            {
+                "x": 1099,
+                "y": 911
+            },
+            {
+                "x": 1100,
+                "y": 911
+            },
+            {
+                "x": 1101,
+                "y": 911
+            },
+            {
+                "x": 1101,
+                "y": 912
+            },
+            {
+                "x": 1102,
+                "y": 912
+            },
+            {
+                "x": 1103,
+                "y": 912
+            },
+            {
+                "x": 1103,
+                "y": 913
+            },
+            {
+                "x": 1104,
+                "y": 913
+            },
+            {
+                "x": 1104,
+                "y": 914
+            },
+            {
+                "x": 1105,
+                "y": 914
+            },
+            {
+                "x": 1105,
+                "y": 915
+            },
+            {
+                "x": 1106,
+                "y": 915
+            },
+            {
+                "x": 1107,
+                "y": 915
+            },
+            {
+                "x": 1107,
+                "y": 916
+            },
+            {
+                "x": 1108,
+                "y": 916
+            },
+            {
+                "x": 1108,
+                "y": 917
+            },
+            {
+                "x": 1109,
+                "y": 917
+            },
+            {
+                "x": 1109,
+                "y": 918
+            },
+            {
+                "x": 1110,
+                "y": 918
+            },
+            {
+                "x": 1110,
+                "y": 919
+            },
+            {
+                "x": 1111,
+                "y": 919
+            },
+            {
+                "x": 1111,
+                "y": 920
+            },
+            {
+                "x": 1112,
+                "y": 920
+            },
+            {
+                "x": 1112,
+                "y": 921
+            },
+            {
+                "x": 1113,
+                "y": 921
+            },
+            {
+                "x": 1113,
+                "y": 922
+            },
+            {
+                "x": 1114,
+                "y": 922
+            },
+            {
+                "x": 1114,
+                "y": 923
+            },
+            {
+                "x": 1115,
+                "y": 923
+            },
+            {
+                "x": 1115,
+                "y": 924
+            },
+            {
+                "x": 1115,
+                "y": 925
+            },
+            {
+                "x": 1116,
+                "y": 925
+            },
+            {
+                "x": 1116,
+                "y": 926
+            },
+            {
+                "x": 1117,
+                "y": 926
+            },
+            {
+                "x": 1117,
+                "y": 927
+            },
+            {
+                "x": 1117,
+                "y": 928
+            },
+            {
+                "x": 1118,
+                "y": 928
+            },
+            {
+                "x": 1118,
+                "y": 929
+            },
+            {
+                "x": 1119,
+                "y": 929
+            },
+            {
+                "x": 1119,
+                "y": 930
+            },
+            {
+                "x": 1120,
+                "y": 930
+            },
+            {
+                "x": 1120,
+                "y": 931
+            },
+            {
+                "x": 1120,
+                "y": 932
+            },
+            {
+                "x": 1120,
+                "y": 932
+            },
+            {
+                "x": 1119,
+                "y": 933
+            },
+            {
+                "x": 1119,
+                "y": 934
+            },
+            {
+                "x": 1119,
+                "y": 934
+            },
+            {
+                "x": 1118,
+                "y": 935
+            },
+            {
+                "x": 1118,
+                "y": 935
+            }
+        ]
+    },
+    {
+        "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2",
+        "schemaId": "ckrazcuec16om0z66bhhh4tp7",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "point": {
+            "x": 2122,
+            "y": 1457
+        }
+    }
+]
\ No newline at end of file
diff --git a/tests/data/assets/ndjson/metric_import_global_key.json b/tests/data/assets/ndjson/metric_import_global_key.json
new file mode 100644
index 000000000..666f4ec97
--- /dev/null
+++ b/tests/data/assets/ndjson/metric_import_global_key.json
@@ -0,0 +1 @@
+[{"uuid" : "a22bbf6e-b2da-4abe-9a11-df84759f7672","dataRow" : {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, "metricValue" : 0.1}]
diff --git a/tests/data/assets/ndjson/pdf_import_global_key.json b/tests/data/assets/ndjson/pdf_import_global_key.json
new file mode 100644
index 000000000..8a14273d6
--- /dev/null
+++ b/tests/data/assets/ndjson/pdf_import_global_key.json
@@ -0,0 +1,100 @@
+[{
+    "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4",
+    "dataRow": {
+        "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+    },
+    "name": "boxy",
+    "schemaId": "cl6xnuwt95lqq07330tbb3mfd",
+    "classifications": [],
+    "page": 4,
+    "unit": "POINTS",
+    "confidence": 0.53,
+    "bbox": {
+        "top": 162.73,
+        "left": 32.45,
+        "height": 388.16999999999996,
+        "width": 101.66000000000001
+    }
+}, {
+    "uuid": "20eeef88-0294-49b4-a815-86588476bc6f",
+    "dataRow": {
+        "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+    },
+    "name": "boxy",
+    "schemaId": "cl6xnuwt95lqq07330tbb3mfd",
+    "classifications": [],
+    "page": 7,
+    "unit": "POINTS",
+    "bbox": {
+        "top": 223.26,
+        "left": 251.42,
+        "height": 457.03999999999996,
+        "width": 186.78
+    }
+}, {
+    "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5",
+    "dataRow": {
+        "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+    },
+    "name": "boxy",
+    "schemaId": "cl6xnuwt95lqq07330tbb3mfd",
+    "classifications": [],
+    "page": 6,
+    "unit": "POINTS",
+    "confidence": 0.99,
+    "bbox": {
+        "top": 32.52,
+        "left": 218.17,
+        "height": 231.73,
+        "width": 110.56000000000003
+    }
+}, {
+    "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c",
+    "dataRow": {
+        "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+    },
+    "name": "boxy",
+    "schemaId": "cl6xnuwt95lqq07330tbb3mfd",
+    "classifications": [],
+    "page": 7,
+    "unit": "POINTS",
+    "confidence": 0.89,
+    "bbox": {
+        "top": 117.39,
+        "left": 4.25,
+        "height": 456.9200000000001,
+        "width": 164.83
+    }
+}, {
+    "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63",
+    "dataRow": {
+        "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+    },
+    "name": "boxy",
+    "schemaId": "cl6xnuwt95lqq07330tbb3mfd",
+    "classifications": [],
+    "page": 8,
+    "unit": "POINTS",
+    "bbox": {
+        "top": 82.13,
+        "left": 217.28,
+        "height": 279.76,
+        "width": 82.43000000000004
+    }
+}, {
+    "uuid": "1b009654-bc17-42a2-8a71-160e7808c403",
+    "dataRow": {
+        "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+    },
+    "name": "boxy",
+    "schemaId": "cl6xnuwt95lqq07330tbb3mfd",
+    "classifications": [],
+    "page": 3,
+    "unit": "POINTS",
+    "bbox": {
+        "top": 298.12,
+        "left": 83.34,
+        "height": 203.83000000000004,
+        "width": 0.37999999999999545
+    }
+}]
\ No newline at end of file
diff --git a/tests/data/assets/ndjson/polyline_import_global_key.json b/tests/data/assets/ndjson/polyline_import_global_key.json
new file mode 100644
index 000000000..f32612cfa
--- /dev/null
+++ b/tests/data/assets/ndjson/polyline_import_global_key.json
@@ -0,0 +1,26 @@
+[
+    {
+        "line": [
+            {
+                "x": 2534.353,
+                "y": 249.471
+            },
+            {
+                "x": 2429.492,
+                "y": 182.092
+            },
+            {
+                "x": 2294.322,
+                "y": 221.962
+            }
+        ],
+        "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "name": "some-line",
+        "schemaId": "cl6xnuwt95lqq07330tbb3mfd",
+        "classifications": [],
+        "confidence": 0.58
+    }
+]
\ No newline at end of file
diff --git a/tests/data/assets/ndjson/text_entity_import_global_key.json b/tests/data/assets/ndjson/text_entity_import_global_key.json
new file mode 100644
index 000000000..b8453f9e4
--- /dev/null
+++ b/tests/data/assets/ndjson/text_entity_import_global_key.json
@@ -0,0 +1,16 @@
+[
+    {
+        "location": {
+            "start": 67,
+            "end": 128
+        },
+        "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4",
+        "dataRow": {
+            "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+        },
+        "name": "some-text-entity",
+        "schemaId": "cl6xnuwt95lqq07330tbb3mfd",
+        "classifications": [],
+        "confidence": 0.53
+    }
+]
\ No newline at end of file
diff --git a/tests/data/assets/ndjson/video_import_global_key.json b/tests/data/assets/ndjson/video_import_global_key.json
new file mode 100644
index 000000000..568501dc5
--- /dev/null
+++ b/tests/data/assets/ndjson/video_import_global_key.json
@@ -0,0 +1,106 @@
+[
+    {
+        "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"},
+        "schemaId": "ckrb1sfjx099a0y914hl319ie",
+        "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"},
+        "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673",
+        "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}]
+    },
+    {
+        "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}],
+        "schemaId": "ckrb1sfkn099c0y910wbo0p1a",
+        "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"},
+        "uuid": "d009925d-91a3-4f67-abd9-753453f5a584",
+        "frames": [{"start": 0, "end": 5}]
+    },
+    {
+        "answer": "a value",
+        "schemaId": "ckrb1sfkn099c0y910wbo0p1a",
+        "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"},
+        "uuid": "d009925d-91a3-4f67-abd9-753453f5a584"
+    },
+    {
+        "classifications": [],
+        "schemaId": "cl5islwg200gfci6g0oitaypu",
+        "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"},
+        "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94",
+        "segments": [
+            {
+                "keyframes": [
+                    {
+                        "frame": 1,
+                        "line": [{"x": 10.0, "y": 10.0}, {"x": 100.0, "y": 100.0}, {"x": 50.0, "y": 30.0}]
+                    },
+                    {
+                        "frame": 5,
+                        "line": [{"x": 15.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}]
+                    }
+                ]
+            },
+            {
+                "keyframes": [
+                    {
+                        "frame": 8,
+                        "line": [{"x": 100.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}]
+                    }
+                ]
+            }
+        ]
+    },
+    {
+        "classifications": [],
+        "schemaId": "cl5it7ktp00i5ci6gf80b1ysd",
+        "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"},
+        "uuid": "f963be22-227b-4efe-9be4-2738ed822216",
+        "segments": [
+            {
+                "keyframes": [
+                    {
+                        "frame": 1,
+                        "point": {"x": 10.0, "y": 10.0}
+                    }
+                ]
+            },
+            {
+                "keyframes": [
+                    {
+                        "frame": 5,
+                        "point": {"x": 50.0, "y": 50.0}
+                    },
+                    {
+                        "frame": 10,
+                        "point": {"x": 10.0, "y": 50.0}
+                    }
+                ]
+            }
+        ]
+    },
+    {
+        "classifications": [],
+        "schemaId": "cl5iw0roz00lwci6g5jni62vs",
+        "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"},
+        "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7",
+        "segments": [
+            {
+                "keyframes": [
+                    {
+                        "frame": 1,
+                        "bbox": {"top": 10.0, "left": 5.0, "height": 100.0, "width": 150.0}
+                    },
+                    {
+                        "frame": 5,
+                        "bbox": {"top": 30.0, "left": 5.0, "height": 50.0, "width": 150.0}
+                    }
+                ]
+            },
+            {
+                "keyframes": [
+                    {
+                        "frame": 10,
+                        "bbox": {"top": 300.0, "left": 200.0, "height": 400.0, "width": 150.0}
+                    }
+                ]
+            }
+        ]
+    }
+]
\ No newline at end of file
diff --git a/tests/data/serialization/ndjson/test_global_key.py b/tests/data/serialization/ndjson/test_global_key.py
new file mode 100644
index 000000000..7e3efdf6e
--- /dev/null
+++ b/tests/data/serialization/ndjson/test_global_key.py
@@ -0,0 +1,67 @@
+import json
+import pytest
+
+from labelbox.data.serialization.ndjson.classification import NDRadio
+
+from labelbox.data.serialization.ndjson.converter import NDJsonConverter
+from labelbox.data.serialization.ndjson.objects import NDLine
+
+
+def round_dict(data):
+    if isinstance(data, dict):
+        for key in data:
+            if isinstance(data[key], float):
+                data[key] = int(data[key])
+            elif isinstance(data[key], dict):
+                data[key] = round_dict(data[key])
+            elif isinstance(data[key], (list, tuple)):
+                data[key] = [round_dict(r) for r in data[key]]
+
+    return data
+
+
+@pytest.mark.parametrize('filename', [
+    'tests/data/assets/ndjson/classification_import_global_key.json',
+    'tests/data/assets/ndjson/metric_import_global_key.json',
+    'tests/data/assets/ndjson/polyline_import_global_key.json',
+    'tests/data/assets/ndjson/text_entity_import_global_key.json'
+])
+def test_many_types(filename: str):
+    with open(filename, 'r') as f:
+        data = json.load(f)
+    res = list(NDJsonConverter.deserialize(data))
+    res = list(NDJsonConverter.serialize(res))
+    assert res == data
+    f.close()
+
+
+def test_image():
+    with open('tests/data/assets/ndjson/image_import_global_key.json',
+              'r') as f:
+        data = json.load(f)
+    res = list(NDJsonConverter.deserialize(data))
+    res = list(NDJsonConverter.serialize(res))
+    for r in res:
+        r.pop('classifications', None)
+    assert [round_dict(x) for x in res] == [round_dict(x) for x in data]
+    f.close()
+
+
+def test_pdf():
+    with open('tests/data/assets/ndjson/pdf_import_global_key.json', 'r') as f:
+        data = json.load(f)
+    res = list(NDJsonConverter.deserialize(data))
+    res = list(NDJsonConverter.serialize(res))
+    assert [round_dict(x) for x in res] == [round_dict(x) for x in data]
+    f.close()
+
+
+def test_video():
+    with open('tests/data/assets/ndjson/video_import_global_key.json',
+              'r') as f:
+        data = json.load(f)
+
+    res = list(NDJsonConverter.deserialize(data))
+    res = list(NDJsonConverter.serialize(res))
+    assert res == [data[2], data[0], data[1], data[3], data[4], data[5]]
+    f.close()
diff --git a/tests/integration/annotation_import/test_label_import.py b/tests/integration/annotation_import/test_label_import.py
index 9848186e0..44a308776 100644
--- a/tests/integration/annotation_import/test_label_import.py
+++ b/tests/integration/annotation_import/test_label_import.py
@@ -2,7 +2,6 @@
 import pytest
 
 from labelbox.schema.annotation_import import AnnotationImportState, LabelImport
-from labelbox.data.serialization import NDJsonConverter
 """
 - Here we only want to check that the uploads are calling the validation
 - Then with unit tests we can check the types of errors raised
@@ -38,62 +37,6 @@ def test_create_from_objects(client, configured_project, object_predictions,
         label_import.input_file_url, object_predictions)
 
 
-def test_data_row_validation_errors(client, configured_project,
-                                    object_predictions):
-    name = str(uuid.uuid4())
-    # Set up data for validation errors
-    # Invalid: Remove 'dataRow' part entirely
-    del object_predictions[0]['dataRow']
-
-    # Invalid: Set both id and globalKey
-    object_predictions[1]['dataRow'] = {
-        'id': 'some id',
-        'globalKey': 'some global key'
-    }
-
-    # Valid
-    object_predictions[2]['dataRow'] = {
-        'id': 'some id',
-    }
-
-    # Valid
-    object_predictions[3]['dataRow'] = {
-        'globalKey': 'some global key',
-    }
-
-    with pytest.raises(ValueError) as exc_info:
-        label_import = LabelImport.create_from_objects(
-            client=client,
-            project_id=configured_project.uid,
-            name=name,
-            labels=object_predictions)
-    exception_str = str(exc_info.value)
-    assert "Found 2 annotations with errors" in exception_str
-    assert "'dataRow' is missing in" in exception_str
-    assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str
-
-
-def test_create_from_label_objects(client, configured_project,
-                                   object_predictions,
-                                   annotation_import_test_helpers):
-    """this test should check running state only to validate running, not completed"""
-    name = str(uuid.uuid4())
-
-    labels = list(NDJsonConverter.deserialize(object_predictions))
-
-    label_import = LabelImport.create_from_objects(
-        client=client,
-        project_id=configured_project.uid,
-        name=name,
-        labels=labels)
-
-    assert label_import.parent_id == configured_project.uid
-    annotation_import_test_helpers.check_running_state(label_import, name)
-    normalized_predictions = NDJsonConverter.serialize(labels)
-    annotation_import_test_helpers.assert_file_content(
-        label_import.input_file_url, normalized_predictions)
-
-
 #   TODO: add me when we add this ability
 # def test_create_from_local_file(client, tmp_path, project,
 #                                 object_predictions, annotation_import_test_helpers):
diff --git a/tests/integration/annotation_import/test_mea_prediction_import.py b/tests/integration/annotation_import/test_mea_prediction_import.py
index 531139fcc..afca122b6 100644
--- a/tests/integration/annotation_import/test_mea_prediction_import.py
+++ b/tests/integration/annotation_import/test_mea_prediction_import.py
@@ -11,38 +11,6 @@
 """
 
 
-def test_data_row_validation_errors(model_run_with_all_project_labels,
-                                    object_predictions):
-    name = str(uuid.uuid4())
-    # Set up data for validation errors
-    # Invalid: Remove 'dataRow' part entirely
-    del object_predictions[0]['dataRow']
-
-    # Invalid: Set both id and globalKey
-    object_predictions[1]['dataRow'] = {
-        'id': 'some id',
-        'globalKey': 'some global key'
-    }
-
-    # Valid
-    object_predictions[2]['dataRow'] = {
-        'id': 'some id',
-    }
-
-    # Valid
-    object_predictions[3]['dataRow'] = {
-        'globalKey': 'some global key',
-    }
-
-    with pytest.raises(ValueError) as exc_info:
-        model_run_with_all_project_labels.add_predictions(
-            name=name, predictions=object_predictions)
-    exception_str = str(exc_info.value)
-    assert "Found 2 annotations with errors" in exception_str
-    assert "'dataRow' is missing in" in exception_str
-    assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str
-
-
 def test_create_from_url(model_run_with_model_run_data_rows,
                          annotation_import_test_helpers):
     name = str(uuid.uuid4())
diff --git a/tests/unit/test_annotation_import.py b/tests/unit/test_annotation_import.py
new file mode 100644
index 000000000..ff0835467
--- /dev/null
+++ b/tests/unit/test_annotation_import.py
@@ -0,0 +1,85 @@
+import pytest
+
+from labelbox.schema.annotation_import import AnnotationImport
+
+
+def test_data_row_validation_errors():
+    predictions = [
+        {
+            "answer": {
+                "schemaId": "ckrb1sfl8099g0y91cxbd5ftb",
+            },
+            "schemaId": "c123",
+            "dataRow": {
+                "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+            },
+        },
+        {
+            "answer": {
+                "schemaId": "ckrb1sfl8099g0y91cxbd5ftb",
+            },
+            "schemaId": "c123",
+            "dataRow": {
+                "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+            },
+        },
+        {
+            "answer": {
+                "schemaId": "ckrb1sfl8099g0y91cxbd5ftb",
+            },
+            "schemaId": "c123",
+            "dataRow": {
+                "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+            },
+        },
+        {
+            "answer": {
+                "schemaId": "ckrb1sfl8099g0y91cxbd5ftb",
+            },
+            "schemaId": "c123",
+            "dataRow": {
+                "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+            },
+        },
+        {
+            "answer": {
+                "schemaId": "ckrb1sfl8099g0y91cxbd5ftb",
+            },
+            "schemaId": "c123",
+            "dataRow": {
+                "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"
+            },
+        },
+    ]
+
+    # Set up data for validation errors
+    # Invalid: Remove 'dataRow' part entirely
+    del predictions[0]['dataRow']
+
+    # Invalid: Set both id and globalKey
+    predictions[1]['dataRow'] = {
+        'id': 'some id',
+        'globalKey': 'some global key'
+    }
+
+    # Invalid: Set both id and globalKey to None
+    predictions[2]['dataRow'] = {'id': None, 'globalKey': None}
+
+    # Valid
+    predictions[3]['dataRow'] = {
+        'id': 'some id',
+    }
+
+    # Valid
+    predictions[4]['dataRow'] = {
+        'globalKey': 'some global key',
+    }
+
+    with pytest.raises(ValueError) as exc_info:
+        AnnotationImport._validate_data_rows(predictions)
+    exception_str = str(exc_info.value)
+    assert "Found 3 annotations with errors" in exception_str
+    assert "'dataRow' is missing in" in exception_str
+    assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str
+    assert "'dataRow': {'id': 'some id', 'globalKey': 'some global key'}" in exception_str
+    assert "'dataRow': {'id': None, 'globalKey': None}" in exception_str

From 2ba04b937706a7ef56831f4c6e7751a6bacb135e Mon Sep 17 00:00:00 2001
From: Kevin Kim <kkim@kevins-mbp-2.lan>
Date: Mon, 13 Mar 2023 16:53:47 -0700
Subject: [PATCH 5/5] Add _CamelCaseMixin

---
 labelbox/data/serialization/ndjson/base.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/labelbox/data/serialization/ndjson/base.py b/labelbox/data/serialization/ndjson/base.py
index 493771bfe..4a2946e1f 100644
--- a/labelbox/data/serialization/ndjson/base.py
+++ b/labelbox/data/serialization/ndjson/base.py
@@ -2,26 +2,22 @@
 from uuid import uuid4
 from pydantic import BaseModel, root_validator, validator, Field
 
-from labelbox.utils import camel_case, is_exactly_one_set
+from labelbox.utils import _CamelCaseMixin, camel_case, is_exactly_one_set
 from ...annotation_types.types import Cuid
 
 
-class DataRow(BaseModel):
+class DataRow(_CamelCaseMixin):
     id: str = None
     global_key: str = None
 
     @root_validator()
     def must_set_one(cls, values):
-        if is_exactly_one_set(values.get('id'), values.get('global_key')):
+        if not is_exactly_one_set(values.get('id'), values.get('global_key')):
             raise ValueError("Must set either id or global_key")
         return values
 
-    class Config:
-        allow_population_by_field_name = True
-        alias_generator = camel_case
 
-
-class NDJsonBase(BaseModel):
+class NDJsonBase(_CamelCaseMixin):
     uuid: str = None
     data_row: DataRow
 
@@ -38,10 +34,6 @@ def dict(self, *args, **kwargs):
             res['dataRow'].pop('globalKey')
         return res
 
-    class Config:
-        allow_population_by_field_name = True
-        alias_generator = camel_case
-
 
 class NDAnnotation(NDJsonBase):
     name: Optional[str] = None