From ee2d55c443876d9b2f0cdb95141500343b7cf655 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Fri, 10 Mar 2023 09:45:58 -0800 Subject: [PATCH 1/5] First commit for supporting globalKeys in annotation import SDK --- labelbox/__init__.py | 2 +- .../data/annotation_types/data/base_data.py | 1 + labelbox/schema/annotation_import.py | 32 +++++++++++++++++ .../annotation_import/test_label_import.py | 35 +++++++++++++++++++ .../test_mea_prediction_import.py | 32 +++++++++++++++++ 5 files changed, 101 insertions(+), 1 deletion(-) diff --git a/labelbox/__init__.py b/labelbox/__init__.py index 47b4a9d70..6cc005fba 100644 --- a/labelbox/__init__.py +++ b/labelbox/__init__.py @@ -5,7 +5,7 @@ from labelbox.schema.project import Project from labelbox.schema.model import Model from labelbox.schema.bulk_import_request import BulkImportRequest -from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport +from labelbox.schema.annotation_import import MALPredictionImport, MEAPredictionImport, LabelImport, MEAToMALPredictionImport from labelbox.schema.dataset import Dataset from labelbox.schema.data_row import DataRow from labelbox.schema.label import Label diff --git a/labelbox/data/annotation_types/data/base_data.py b/labelbox/data/annotation_types/data/base_data.py index 0c376db37..2ccda34c3 100644 --- a/labelbox/data/annotation_types/data/base_data.py +++ b/labelbox/data/annotation_types/data/base_data.py @@ -11,5 +11,6 @@ class BaseData(BaseModel, ABC): """ external_id: Optional[str] = None uid: Optional[str] = None + global_key: Optional[str] = None media_attributes: Optional[Dict[str, Any]] = None metadata: Optional[List[Dict[str, Any]]] = None diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py index e2f5f2557..65261a62f 100644 --- a/labelbox/schema/annotation_import.py +++ b/labelbox/schema/annotation_import.py @@ -155,6 +155,8 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]], ) objects = serialize_labels(objects) + # cls.validate_data_rows(objects) + data_str = ndjson.dumps(objects) if not data_str: raise ValueError(f"{object_name} cannot be empty") @@ -171,6 +173,36 @@ def refresh(self) -> None: as_json=True) self._set_field_values(res) + @classmethod + def validate_data_rows(cls, objects: List[Dict[str, Any]]): + """ + Validates annotations by checking 'dataRow' is provided + and only one of 'id' or 'globalKey' is provided. + + Shows up to `max_num_errors` errors if invalidated, to prevent + large number of error messages from being printed out + """ + errors = [] + max_num_errors = 100 + for object in objects: + if 'dataRow' not in object: + errors.append(f"'dataRow' is missing in {object}") + elif 'id' in object['dataRow'] and 'globalKey' in object['dataRow']: + errors.append( + f"Must provide only one of 'id' or 'globalKey' for 'dataRow' in {object}" + ) + + if errors: + errors_length = len(errors) + formatted_errors = '\n'.join(errors[:max_num_errors]) + if errors_length > max_num_errors: + logger.warning( + f"Found more than {max_num_errors} errors. Showing first {max_num_errors} error messages..." + ) + raise ValueError( + f"Error while validating annotations. Found {errors_length} annotations with errors. Errors:\n{formatted_errors}" + ) + @classmethod def from_name(cls, client: "labelbox.Client", diff --git a/tests/integration/annotation_import/test_label_import.py b/tests/integration/annotation_import/test_label_import.py index 2fc8ac84d..9848186e0 100644 --- a/tests/integration/annotation_import/test_label_import.py +++ b/tests/integration/annotation_import/test_label_import.py @@ -38,6 +38,41 @@ def test_create_from_objects(client, configured_project, object_predictions, label_import.input_file_url, object_predictions) +def test_data_row_validation_errors(client, configured_project, + object_predictions): + name = str(uuid.uuid4()) + # Set up data for validation errors + # Invalid: Remove 'dataRow' part entirely + del object_predictions[0]['dataRow'] + + # Invalid: Set both id and globalKey + object_predictions[1]['dataRow'] = { + 'id': 'some id', + 'globalKey': 'some global key' + } + + # Valid + object_predictions[2]['dataRow'] = { + 'id': 'some id', + } + + # Valid + object_predictions[3]['dataRow'] = { + 'globalKey': 'some global key', + } + + with pytest.raises(ValueError) as exc_info: + label_import = LabelImport.create_from_objects( + client=client, + project_id=configured_project.uid, + name=name, + labels=object_predictions) + exception_str = str(exc_info.value) + assert "Found 2 annotations with errors" in exception_str + assert "'dataRow' is missing in" in exception_str + assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str + + def test_create_from_label_objects(client, configured_project, object_predictions, annotation_import_test_helpers): diff --git a/tests/integration/annotation_import/test_mea_prediction_import.py b/tests/integration/annotation_import/test_mea_prediction_import.py index afca122b6..531139fcc 100644 --- a/tests/integration/annotation_import/test_mea_prediction_import.py +++ b/tests/integration/annotation_import/test_mea_prediction_import.py @@ -11,6 +11,38 @@ """ +def test_data_row_validation_errors(model_run_with_all_project_labels, + object_predictions): + name = str(uuid.uuid4()) + # Set up data for validation errors + # Invalid: Remove 'dataRow' part entirely + del object_predictions[0]['dataRow'] + + # Invalid: Set both id and globalKey + object_predictions[1]['dataRow'] = { + 'id': 'some id', + 'globalKey': 'some global key' + } + + # Valid + object_predictions[2]['dataRow'] = { + 'id': 'some id', + } + + # Valid + object_predictions[3]['dataRow'] = { + 'globalKey': 'some global key', + } + + with pytest.raises(ValueError) as exc_info: + model_run_with_all_project_labels.add_predictions( + name=name, predictions=object_predictions) + exception_str = str(exc_info.value) + assert "Found 2 annotations with errors" in exception_str + assert "'dataRow' is missing in" in exception_str + assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str + + def test_create_from_url(model_run_with_model_run_data_rows, annotation_import_test_helpers): name = str(uuid.uuid4()) From 9793391a4051e07f87f050a5d7ff4448fd4774ca Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Fri, 10 Mar 2023 18:01:55 -0800 Subject: [PATCH 2/5] Global keys to Annotation Types, and add validation when both id and globalKey are provided --- labelbox/data/annotation_types/data/raster.py | 5 ++- labelbox/data/annotation_types/data/text.py | 6 ++- labelbox/data/annotation_types/data/video.py | 6 ++- labelbox/data/serialization/ndjson/base.py | 27 +++++++++---- .../serialization/ndjson/classification.py | 8 ++-- labelbox/data/serialization/ndjson/label.py | 38 +++++++++++++------ labelbox/data/serialization/ndjson/metric.py | 6 +-- labelbox/data/serialization/ndjson/objects.py | 14 +++---- labelbox/schema/annotation_import.py | 2 +- 9 files changed, 71 insertions(+), 41 deletions(-) diff --git a/labelbox/data/annotation_types/data/raster.py b/labelbox/data/annotation_types/data/raster.py index 152960a09..45998cb26 100644 --- a/labelbox/data/annotation_types/data/raster.py +++ b/labelbox/data/annotation_types/data/raster.py @@ -163,9 +163,10 @@ def validate_args(cls, values): url = values.get("url") arr = values.get("arr") uid = values.get('uid') - if uid == file_path == im_bytes == url == None and arr is None: + global_key = values.get('global_key') + if uid == file_path == im_bytes == url == global_key == None and arr is None: raise ValueError( - "One of `file_path`, `im_bytes`, `url`, `uid` or `arr` required." + "One of `file_path`, `im_bytes`, `url`, `uid`, `global_key` or `arr` required." ) if arr is not None: if arr.dtype != np.uint8: diff --git a/labelbox/data/annotation_types/data/text.py b/labelbox/data/annotation_types/data/text.py index 816f39c2e..9149b0394 100644 --- a/labelbox/data/annotation_types/data/text.py +++ b/labelbox/data/annotation_types/data/text.py @@ -93,9 +93,11 @@ def validate_date(cls, values): text = values.get("text") url = values.get("url") uid = values.get('uid') - if uid == file_path == text == url == None: + global_key = values.get('global_key') + if uid == file_path == text == url == global_key == None: raise ValueError( - "One of `file_path`, `text`, `uid`, or `url` required.") + "One of `file_path`, `text`, `uid`, `global_key` or `url` required." + ) return values def __repr__(self) -> str: diff --git a/labelbox/data/annotation_types/data/video.py b/labelbox/data/annotation_types/data/video.py index 8d835acdb..53cee7280 100644 --- a/labelbox/data/annotation_types/data/video.py +++ b/labelbox/data/annotation_types/data/video.py @@ -153,10 +153,12 @@ def validate_data(cls, values): url = values.get("url") frames = values.get("frames") uid = values.get("uid") + global_key = values.get("global_key") - if uid == file_path == frames == url == None: + if uid == file_path == frames == url == global_key == None: raise ValueError( - "One of `file_path`, `frames`, `uid`, or `url` required.") + "One of `file_path`, `frames`, `uid`, `global_key` or `url` required." + ) return values def __repr__(self) -> str: diff --git a/labelbox/data/serialization/ndjson/base.py b/labelbox/data/serialization/ndjson/base.py index 654c32495..b3ef0fc8b 100644 --- a/labelbox/data/serialization/ndjson/base.py +++ b/labelbox/data/serialization/ndjson/base.py @@ -8,15 +8,17 @@ class DataRow(BaseModel): id: str = None + global_key: str = None - @validator('id', pre=True, always=True) - def validate_id(cls, v): - if v is None: - raise ValueError( - "Data row ids are not set. Use `LabelGenerator.add_to_dataset`,or `Label.create_data_row`. " - "You can also manually assign the id for each `BaseData` object" - ) - return v + @root_validator() + def must_set_one(cls, values): + if bool(values.get('id')) == bool(values.get('global_key')): + raise ValueError("Must set either id or global_key") + return values + + class Config: + allow_population_by_field_name = True + alias_generator = camel_case class NDJsonBase(BaseModel): @@ -27,6 +29,15 @@ class NDJsonBase(BaseModel): def set_id(cls, v): return v or str(uuid4()) + def dict(self, *args, **kwargs): + """ Pop missing id or missing globalKey from dataRow """ + res = super().dict(*args, **kwargs) + if not self.data_row.id: + res['dataRow'].pop('id') + if not self.data_row.global_key: + res['dataRow'].pop('globalKey') + return res + class Config: allow_population_by_field_name = True alias_generator = camel_case diff --git a/labelbox/data/serialization/ndjson/classification.py b/labelbox/data/serialization/ndjson/classification.py index bbd4342e2..7b48c3a30 100644 --- a/labelbox/data/serialization/ndjson/classification.py +++ b/labelbox/data/serialization/ndjson/classification.py @@ -8,7 +8,7 @@ from ...annotation_types.classification.classification import ClassificationAnswer, Dropdown, Text, Checklist, Radio from ...annotation_types.types import Cuid from ...annotation_types.data import TextData, VideoData, ImageData -from .base import NDAnnotation +from .base import DataRow, NDAnnotation class NDFeature(ConfidenceMixin): @@ -125,7 +125,7 @@ def from_common(cls, text: Text, name: str, feature_schema_id: Cuid, ImageData]) -> "NDText": return cls( answer=text.answer, - data_row={'id': data.uid}, + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -145,7 +145,7 @@ def from_common( confidence=answer.confidence) for answer in checklist.answer ], - data_row={'id': data.uid}, + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -161,7 +161,7 @@ def from_common(cls, radio: Radio, name: str, feature_schema_id: Cuid, return cls(answer=NDFeature(name=radio.answer.name, schema_id=radio.answer.feature_schema_id, confidence=radio.answer.confidence), - data_row={'id': data.uid}, + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), diff --git a/labelbox/data/serialization/ndjson/label.py b/labelbox/data/serialization/ndjson/label.py index 8dd339767..2bc8bddb9 100644 --- a/labelbox/data/serialization/ndjson/label.py +++ b/labelbox/data/serialization/ndjson/label.py @@ -1,6 +1,6 @@ from itertools import groupby from operator import itemgetter -from typing import Dict, Generator, List, Tuple, Union +from typing import Dict, Generator, List, Optional, Tuple, Union from collections import defaultdict import warnings @@ -17,6 +17,7 @@ from .metric import NDScalarMetric, NDMetricAnnotation, NDConfusionMatrixMetric from .classification import NDChecklistSubclass, NDClassification, NDClassificationType, NDRadioSubclass from .objects import NDObject, NDObjectType, NDSegments +from .base import DataRow class NDLabel(BaseModel): @@ -27,7 +28,10 @@ class NDLabel(BaseModel): def to_common(self) -> LabelGenerator: grouped_annotations = defaultdict(list) for annotation in self.annotations: - grouped_annotations[annotation.data_row.id].append(annotation) + grouped_annotations[annotation.data_row.id or + annotation.data_row.global_key].append( + annotation) + print(grouped_annotations) return LabelGenerator( data=self._generate_annotations(grouped_annotations)) @@ -45,9 +49,11 @@ def _generate_annotations( NDConfusionMatrixMetric, NDScalarMetric, NDSegments]]] ) -> Generator[Label, None, None]: - for data_row_id, annotations in grouped_annotations.items(): + for _, annotations in grouped_annotations.items(): annots = [] + data_row = annotations[0].data_row for annotation in annotations: + if isinstance(annotation, NDSegments): annots.extend( NDSegments.to_common(annotation, annotation.name, @@ -62,22 +68,30 @@ def _generate_annotations( else: raise TypeError( f"Unsupported annotation. {type(annotation)}") - data = self._infer_media_type(annots)(uid=data_row_id) - yield Label(annotations=annots, data=data) + yield Label(annotations=annots, + data=self._infer_media_type(data_row, annots)) def _infer_media_type( - self, annotations: List[Union[TextEntity, VideoClassificationAnnotation, - VideoObjectAnnotation, ObjectAnnotation, - ClassificationAnnotation, ScalarMetric, - ConfusionMatrixMetric]] + self, data_row: DataRow, + annotations: List[Union[TextEntity, VideoClassificationAnnotation, + VideoObjectAnnotation, ObjectAnnotation, + ClassificationAnnotation, ScalarMetric, + ConfusionMatrixMetric]] ) -> Union[TextData, VideoData, ImageData]: + if len(annotations) == 0: + raise ValueError("Missing annotations while inferring media type") + types = {type(annotation) for annotation in annotations} + data = ImageData if TextEntity in types: - return TextData + data = TextData elif VideoClassificationAnnotation in types or VideoObjectAnnotation in types: - return VideoData + data = VideoData + + if data_row.id: + return data(uid=data_row.id) else: - return ImageData + return data(global_key=data_row.global_key) @staticmethod def _get_consecutive_frames( diff --git a/labelbox/data/serialization/ndjson/metric.py b/labelbox/data/serialization/ndjson/metric.py index a3f648698..5abbf2761 100644 --- a/labelbox/data/serialization/ndjson/metric.py +++ b/labelbox/data/serialization/ndjson/metric.py @@ -1,7 +1,7 @@ from typing import Optional, Union, Type from labelbox.data.annotation_types.data import ImageData, TextData -from labelbox.data.serialization.ndjson.base import NDJsonBase +from labelbox.data.serialization.ndjson.base import DataRow, NDJsonBase from labelbox.data.annotation_types.metrics.scalar import ( ScalarMetric, ScalarMetricAggregation, ScalarMetricValue, ScalarMetricConfidenceValue) @@ -50,7 +50,7 @@ def from_common( feature_name=metric.feature_name, subclass_name=metric.subclass_name, aggregation=metric.aggregation, - data_row={'id': data.uid}) + data_row=DataRow(id=data.uid, global_key=data.global_key)) class NDScalarMetric(BaseNDMetric): @@ -75,7 +75,7 @@ def from_common(cls, metric: ScalarMetric, feature_name=metric.feature_name, subclass_name=metric.subclass_name, aggregation=metric.aggregation.value, - data_row={'id': data.uid}) + data_row=DataRow(id=data.uid, global_key=data.global_key)) def dict(self, *args, **kwargs): res = super().dict(*args, **kwargs) diff --git a/labelbox/data/serialization/ndjson/objects.py b/labelbox/data/serialization/ndjson/objects.py index 796465c4b..70e672816 100644 --- a/labelbox/data/serialization/ndjson/objects.py +++ b/labelbox/data/serialization/ndjson/objects.py @@ -60,7 +60,7 @@ def from_common(cls, 'x': point.x, 'y': point.y }, - dataRow=DataRow(id=data.uid), + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -105,7 +105,7 @@ def from_common(cls, 'x': pt.x, 'y': pt.y } for pt in line.points], - dataRow=DataRow(id=data.uid), + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -154,7 +154,7 @@ def from_common(cls, 'x': pt.x, 'y': pt.y } for pt in polygon.points], - dataRow=DataRow(id=data.uid), + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -183,7 +183,7 @@ def from_common(cls, left=rectangle.start.x, height=rectangle.end.y - rectangle.start.y, width=rectangle.end.x - rectangle.start.x), - dataRow=DataRow(id=data.uid), + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -280,7 +280,7 @@ def from_common(cls, segments: List[VideoObjectAnnotation], data: VideoData, segments = [NDSegment.from_common(segment) for segment in segments] return cls(segments=segments, - dataRow=DataRow(id=data.uid), + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid')) @@ -332,7 +332,7 @@ def from_common(cls, png=base64.b64encode(im_bytes.getvalue()).decode('utf-8')) return cls(mask=lbv1_mask, - dataRow=DataRow(id=data.uid), + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -364,7 +364,7 @@ def from_common(cls, start=text_entity.start, end=text_entity.end, ), - dataRow=DataRow(id=data.uid), + data_row=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py index 65261a62f..473350608 100644 --- a/labelbox/schema/annotation_import.py +++ b/labelbox/schema/annotation_import.py @@ -155,7 +155,7 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]], ) objects = serialize_labels(objects) - # cls.validate_data_rows(objects) + cls.validate_data_rows(objects) data_str = ndjson.dumps(objects) if not data_str: From 63d30eb0a430ad5c8ded07b03d4c2d9419d0c8be Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Fri, 10 Mar 2023 18:04:36 -0800 Subject: [PATCH 3/5] Remove prints --- labelbox/data/serialization/ndjson/label.py | 1 - 1 file changed, 1 deletion(-) diff --git a/labelbox/data/serialization/ndjson/label.py b/labelbox/data/serialization/ndjson/label.py index 2bc8bddb9..f9e848a64 100644 --- a/labelbox/data/serialization/ndjson/label.py +++ b/labelbox/data/serialization/ndjson/label.py @@ -31,7 +31,6 @@ def to_common(self) -> LabelGenerator: grouped_annotations[annotation.data_row.id or annotation.data_row.global_key].append( annotation) - print(grouped_annotations) return LabelGenerator( data=self._generate_annotations(grouped_annotations)) From 67dc7a9c165de7c6a36876ac1885498c3bd355ae Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Mon, 13 Mar 2023 16:32:01 -0700 Subject: [PATCH 4/5] Address PR comments and fix build failure --- labelbox/data/serialization/ndjson/base.py | 4 +- labelbox/schema/annotation_import.py | 8 +- labelbox/utils.py | 4 + tests/data/annotation_types/test_metrics.py | 3 + .../classification_import_global_key.json | 34 + .../ndjson/image_import_global_key.json | 793 ++++++++++++++++++ .../ndjson/metric_import_global_key.json | 1 + .../assets/ndjson/pdf_import_global_key.json | 100 +++ .../ndjson/polyline_import_global_key.json | 26 + .../ndjson/text_entity_import_global_key.json | 16 + .../ndjson/video_import_global_key.json | 106 +++ .../serialization/ndjson/test_global_key.py | 67 ++ .../annotation_import/test_label_import.py | 57 -- .../test_mea_prediction_import.py | 32 - tests/unit/test_annotation_import.py | 85 ++ 15 files changed, 1242 insertions(+), 94 deletions(-) create mode 100644 tests/data/assets/ndjson/classification_import_global_key.json create mode 100644 tests/data/assets/ndjson/image_import_global_key.json create mode 100644 tests/data/assets/ndjson/metric_import_global_key.json create mode 100644 tests/data/assets/ndjson/pdf_import_global_key.json create mode 100644 tests/data/assets/ndjson/polyline_import_global_key.json create mode 100644 tests/data/assets/ndjson/text_entity_import_global_key.json create mode 100644 tests/data/assets/ndjson/video_import_global_key.json create mode 100644 tests/data/serialization/ndjson/test_global_key.py create mode 100644 tests/unit/test_annotation_import.py diff --git a/labelbox/data/serialization/ndjson/base.py b/labelbox/data/serialization/ndjson/base.py index b3ef0fc8b..493771bfe 100644 --- a/labelbox/data/serialization/ndjson/base.py +++ b/labelbox/data/serialization/ndjson/base.py @@ -2,7 +2,7 @@ from uuid import uuid4 from pydantic import BaseModel, root_validator, validator, Field -from labelbox.utils import camel_case +from labelbox.utils import camel_case, is_exactly_one_set from ...annotation_types.types import Cuid @@ -12,7 +12,7 @@ class DataRow(BaseModel): @root_validator() def must_set_one(cls, values): - if bool(values.get('id')) == bool(values.get('global_key')): + if is_exactly_one_set(values.get('id'), values.get('global_key')): raise ValueError("Must set either id or global_key") return values diff --git a/labelbox/schema/annotation_import.py b/labelbox/schema/annotation_import.py index 473350608..a22525816 100644 --- a/labelbox/schema/annotation_import.py +++ b/labelbox/schema/annotation_import.py @@ -14,6 +14,7 @@ from labelbox.orm import query from labelbox.orm.db_object import DbObject from labelbox.orm.model import Field, Relationship +from labelbox.utils import is_exactly_one_set from labelbox.schema.confidence_presence_checker import LabelsConfidencePresenceChecker from labelbox.schema.enums import AnnotationImportState from labelbox.schema.serialization import serialize_labels @@ -155,7 +156,7 @@ def _get_ndjson_from_objects(cls, objects: Union[List[Dict[str, Any]], ) objects = serialize_labels(objects) - cls.validate_data_rows(objects) + cls._validate_data_rows(objects) data_str = ndjson.dumps(objects) if not data_str: @@ -174,7 +175,7 @@ def refresh(self) -> None: self._set_field_values(res) @classmethod - def validate_data_rows(cls, objects: List[Dict[str, Any]]): + def _validate_data_rows(cls, objects: List[Dict[str, Any]]): """ Validates annotations by checking 'dataRow' is provided and only one of 'id' or 'globalKey' is provided. @@ -187,7 +188,8 @@ def validate_data_rows(cls, objects: List[Dict[str, Any]]): for object in objects: if 'dataRow' not in object: errors.append(f"'dataRow' is missing in {object}") - elif 'id' in object['dataRow'] and 'globalKey' in object['dataRow']: + elif not is_exactly_one_set(object['dataRow'].get('id'), + object['dataRow'].get('globalKey')): errors.append( f"Must provide only one of 'id' or 'globalKey' for 'dataRow' in {object}" ) diff --git a/labelbox/utils.py b/labelbox/utils.py index 624437182..27106abb5 100644 --- a/labelbox/utils.py +++ b/labelbox/utils.py @@ -26,6 +26,10 @@ def snake_case(s): return _convert(s, "_", lambda i: False) +def is_exactly_one_set(x, y): + return not (bool(x) == bool(y)) + + class _CamelCaseMixin(BaseModel): class Config: diff --git a/tests/data/annotation_types/test_metrics.py b/tests/data/annotation_types/test_metrics.py index 389c154aa..3bc447309 100644 --- a/tests/data/annotation_types/test_metrics.py +++ b/tests/data/annotation_types/test_metrics.py @@ -18,6 +18,7 @@ def test_legacy_scalar_metric(): 'data': { 'external_id': None, 'uid': 'ckrmd9q8g000009mg6vej7hzg', + 'global_key': None, 'im_bytes': None, 'file_path': None, 'url': None, @@ -68,6 +69,7 @@ def test_custom_scalar_metric(feature_name, subclass_name, aggregation, value): 'data': { 'external_id': None, 'uid': 'ckrmd9q8g000009mg6vej7hzg', + 'global_key': None, 'im_bytes': None, 'file_path': None, 'url': None, @@ -124,6 +126,7 @@ def test_custom_confusison_matrix_metric(feature_name, subclass_name, 'data': { 'external_id': None, 'uid': 'ckrmd9q8g000009mg6vej7hzg', + 'global_key': None, 'im_bytes': None, 'file_path': None, 'url': None, diff --git a/tests/data/assets/ndjson/classification_import_global_key.json b/tests/data/assets/ndjson/classification_import_global_key.json new file mode 100644 index 000000000..0c8e5482b --- /dev/null +++ b/tests/data/assets/ndjson/classification_import_global_key.json @@ -0,0 +1,34 @@ +[ + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + "confidence": 0.8 + }, + "schemaId": "c123", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673" + }, + { + "answer": [ + { + "schemaId": "ckrb1sfl8099e0y919v260awv", + "confidence": 0.82 + } + ], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + } +] \ No newline at end of file diff --git a/tests/data/assets/ndjson/image_import_global_key.json b/tests/data/assets/ndjson/image_import_global_key.json new file mode 100644 index 000000000..64961fc75 --- /dev/null +++ b/tests/data/assets/ndjson/image_import_global_key.json @@ -0,0 +1,793 @@ +[ + { + "uuid": "b862c586-8614-483c-b5e6-82810f70cac0", + "schemaId": "ckrazcueb16og0z6609jj7y3y", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.851, + "bbox": { + "top": 1352, + "left": 2275, + "height": 350, + "width": 139 + } + }, + { + "uuid": "751fc725-f7b6-48ed-89b0-dd7d94d08af6", + "schemaId": "ckrazcuec16ok0z66f956apb7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.834, + "mask": { + "instanceURI": "https://storage.labelbox.com/ckqcx1czn06830y61gh9v02cs%2F3e729327-f038-f66c-186e-45e921ef9717-1?Expires=1626806874672&KeyName=labelbox-assets-key-3&Signature=YsUOGKrsqmAZ68vT9BlPJOaRyLY", + "colorRGB": [ + 255, + 0, + 0 + ] + } + }, + { + "uuid": "43d719ac-5d7f-4aea-be00-2ebfca0900fd", + "schemaId": "ckrazcuec16oi0z66dzrd8pfl", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "confidence": 0.986, + "polygon": [ + { + "x": 1118, + "y": 935 + }, + { + "x": 1117, + "y": 935 + }, + { + "x": 1116, + "y": 935 + }, + { + "x": 1115, + "y": 935 + }, + { + "x": 1114, + "y": 935 + }, + { + "x": 1113, + "y": 935 + }, + { + "x": 1112, + "y": 935 + }, + { + "x": 1111, + "y": 935 + }, + { + "x": 1110, + "y": 935 + }, + { + "x": 1109, + "y": 935 + }, + { + "x": 1108, + "y": 935 + }, + { + "x": 1108, + "y": 934 + }, + { + "x": 1107, + "y": 934 + }, + { + "x": 1106, + "y": 934 + }, + { + "x": 1105, + "y": 934 + }, + { + "x": 1105, + "y": 933 + }, + { + "x": 1104, + "y": 933 + }, + { + "x": 1103, + "y": 933 + }, + { + "x": 1103, + "y": 932 + }, + { + "x": 1102, + "y": 932 + }, + { + "x": 1101, + "y": 932 + }, + { + "x": 1100, + "y": 932 + }, + { + "x": 1099, + "y": 932 + }, + { + "x": 1098, + "y": 932 + }, + { + "x": 1097, + "y": 932 + }, + { + "x": 1097, + "y": 931 + }, + { + "x": 1096, + "y": 931 + }, + { + "x": 1095, + "y": 931 + }, + { + "x": 1094, + "y": 931 + }, + { + "x": 1093, + "y": 931 + }, + { + "x": 1092, + "y": 931 + }, + { + "x": 1091, + "y": 931 + }, + { + "x": 1090, + "y": 931 + }, + { + "x": 1090, + "y": 930 + }, + { + "x": 1089, + "y": 930 + }, + { + "x": 1088, + "y": 930 + }, + { + "x": 1087, + "y": 930 + }, + { + "x": 1087, + "y": 929 + }, + { + "x": 1086, + "y": 929 + }, + { + "x": 1085, + "y": 929 + }, + { + "x": 1084, + "y": 929 + }, + { + "x": 1084, + "y": 928 + }, + { + "x": 1083, + "y": 928 + }, + { + "x": 1083, + "y": 927 + }, + { + "x": 1082, + "y": 927 + }, + { + "x": 1081, + "y": 927 + }, + { + "x": 1081, + "y": 926 + }, + { + "x": 1080, + "y": 926 + }, + { + "x": 1080, + "y": 925 + }, + { + "x": 1079, + "y": 925 + }, + { + "x": 1078, + "y": 925 + }, + { + "x": 1078, + "y": 924 + }, + { + "x": 1077, + "y": 924 + }, + { + "x": 1076, + "y": 924 + }, + { + "x": 1076, + "y": 923 + }, + { + "x": 1075, + "y": 923 + }, + { + "x": 1074, + "y": 923 + }, + { + "x": 1073, + "y": 923 + }, + { + "x": 1073, + "y": 922 + }, + { + "x": 1072, + "y": 922 + }, + { + "x": 1071, + "y": 922 + }, + { + "x": 1070, + "y": 922 + }, + { + "x": 1070, + "y": 921 + }, + { + "x": 1069, + "y": 921 + }, + { + "x": 1068, + "y": 921 + }, + { + "x": 1067, + "y": 921 + }, + { + "x": 1066, + "y": 921 + }, + { + "x": 1065, + "y": 921 + }, + { + "x": 1064, + "y": 921 + }, + { + "x": 1063, + "y": 921 + }, + { + "x": 1062, + "y": 921 + }, + { + "x": 1061, + "y": 921 + }, + { + "x": 1060, + "y": 921 + }, + { + "x": 1059, + "y": 921 + }, + { + "x": 1058, + "y": 921 + }, + { + "x": 1058, + "y": 920 + }, + { + "x": 1057, + "y": 920 + }, + { + "x": 1057, + "y": 919 + }, + { + "x": 1056, + "y": 919 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 918 + }, + { + "x": 1057, + "y": 917 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1058, + "y": 916 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1059, + "y": 915 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1060, + "y": 914 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1061, + "y": 913 + }, + { + "x": 1062, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1063, + "y": 912 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1064, + "y": 911 + }, + { + "x": 1065, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1066, + "y": 910 + }, + { + "x": 1067, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1068, + "y": 909 + }, + { + "x": 1069, + "y": 908 + }, + { + "x": 1070, + "y": 908 + }, + { + "x": 1071, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1072, + "y": 908 + }, + { + "x": 1073, + "y": 907 + }, + { + "x": 1074, + "y": 907 + }, + { + "x": 1075, + "y": 907 + }, + { + "x": 1076, + "y": 907 + }, + { + "x": 1077, + "y": 907 + }, + { + "x": 1078, + "y": 907 + }, + { + "x": 1079, + "y": 907 + }, + { + "x": 1080, + "y": 907 + }, + { + "x": 1081, + "y": 907 + }, + { + "x": 1082, + "y": 907 + }, + { + "x": 1083, + "y": 907 + }, + { + "x": 1084, + "y": 907 + }, + { + "x": 1085, + "y": 907 + }, + { + "x": 1086, + "y": 907 + }, + { + "x": 1087, + "y": 907 + }, + { + "x": 1088, + "y": 907 + }, + { + "x": 1089, + "y": 907 + }, + { + "x": 1090, + "y": 907 + }, + { + "x": 1091, + "y": 907 + }, + { + "x": 1091, + "y": 908 + }, + { + "x": 1092, + "y": 908 + }, + { + "x": 1093, + "y": 908 + }, + { + "x": 1094, + "y": 908 + }, + { + "x": 1095, + "y": 908 + }, + { + "x": 1095, + "y": 909 + }, + { + "x": 1096, + "y": 909 + }, + { + "x": 1097, + "y": 909 + }, + { + "x": 1097, + "y": 910 + }, + { + "x": 1098, + "y": 910 + }, + { + "x": 1099, + "y": 910 + }, + { + "x": 1099, + "y": 911 + }, + { + "x": 1100, + "y": 911 + }, + { + "x": 1101, + "y": 911 + }, + { + "x": 1101, + "y": 912 + }, + { + "x": 1102, + "y": 912 + }, + { + "x": 1103, + "y": 912 + }, + { + "x": 1103, + "y": 913 + }, + { + "x": 1104, + "y": 913 + }, + { + "x": 1104, + "y": 914 + }, + { + "x": 1105, + "y": 914 + }, + { + "x": 1105, + "y": 915 + }, + { + "x": 1106, + "y": 915 + }, + { + "x": 1107, + "y": 915 + }, + { + "x": 1107, + "y": 916 + }, + { + "x": 1108, + "y": 916 + }, + { + "x": 1108, + "y": 917 + }, + { + "x": 1109, + "y": 917 + }, + { + "x": 1109, + "y": 918 + }, + { + "x": 1110, + "y": 918 + }, + { + "x": 1110, + "y": 919 + }, + { + "x": 1111, + "y": 919 + }, + { + "x": 1111, + "y": 920 + }, + { + "x": 1112, + "y": 920 + }, + { + "x": 1112, + "y": 921 + }, + { + "x": 1113, + "y": 921 + }, + { + "x": 1113, + "y": 922 + }, + { + "x": 1114, + "y": 922 + }, + { + "x": 1114, + "y": 923 + }, + { + "x": 1115, + "y": 923 + }, + { + "x": 1115, + "y": 924 + }, + { + "x": 1115, + "y": 925 + }, + { + "x": 1116, + "y": 925 + }, + { + "x": 1116, + "y": 926 + }, + { + "x": 1117, + "y": 926 + }, + { + "x": 1117, + "y": 927 + }, + { + "x": 1117, + "y": 928 + }, + { + "x": 1118, + "y": 928 + }, + { + "x": 1118, + "y": 929 + }, + { + "x": 1119, + "y": 929 + }, + { + "x": 1119, + "y": 930 + }, + { + "x": 1120, + "y": 930 + }, + { + "x": 1120, + "y": 931 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1120, + "y": 932 + }, + { + "x": 1119, + "y": 933 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1119, + "y": 934 + }, + { + "x": 1118, + "y": 935 + }, + { + "x": 1118, + "y": 935 + } + ] + }, + { + "uuid": "b98f3a45-3328-41a0-9077-373a8177ebf2", + "schemaId": "ckrazcuec16om0z66bhhh4tp7", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "point": { + "x": 2122, + "y": 1457 + } + } +] \ No newline at end of file diff --git a/tests/data/assets/ndjson/metric_import_global_key.json b/tests/data/assets/ndjson/metric_import_global_key.json new file mode 100644 index 000000000..666f4ec97 --- /dev/null +++ b/tests/data/assets/ndjson/metric_import_global_key.json @@ -0,0 +1 @@ +[{"uuid" : "a22bbf6e-b2da-4abe-9a11-df84759f7672","dataRow" : {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, "metricValue" : 0.1}] diff --git a/tests/data/assets/ndjson/pdf_import_global_key.json b/tests/data/assets/ndjson/pdf_import_global_key.json new file mode 100644 index 000000000..8a14273d6 --- /dev/null +++ b/tests/data/assets/ndjson/pdf_import_global_key.json @@ -0,0 +1,100 @@ +[{ + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 4, + "unit": "POINTS", + "confidence": 0.53, + "bbox": { + "top": 162.73, + "left": 32.45, + "height": 388.16999999999996, + "width": 101.66000000000001 + } +}, { + "uuid": "20eeef88-0294-49b4-a815-86588476bc6f", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "bbox": { + "top": 223.26, + "left": 251.42, + "height": 457.03999999999996, + "width": 186.78 + } +}, { + "uuid": "641a8944-3938-409c-b4eb-dea354ed06e5", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 6, + "unit": "POINTS", + "confidence": 0.99, + "bbox": { + "top": 32.52, + "left": 218.17, + "height": 231.73, + "width": 110.56000000000003 + } +}, { + "uuid": "ebe4da7d-08b3-480a-8d15-26552b7f011c", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 7, + "unit": "POINTS", + "confidence": 0.89, + "bbox": { + "top": 117.39, + "left": 4.25, + "height": 456.9200000000001, + "width": 164.83 + } +}, { + "uuid": "35c41855-575f-42cc-a2f9-1f06237e9b63", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 8, + "unit": "POINTS", + "bbox": { + "top": 82.13, + "left": 217.28, + "height": 279.76, + "width": 82.43000000000004 + } +}, { + "uuid": "1b009654-bc17-42a2-8a71-160e7808c403", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "boxy", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "page": 3, + "unit": "POINTS", + "bbox": { + "top": 298.12, + "left": 83.34, + "height": 203.83000000000004, + "width": 0.37999999999999545 + } +}] \ No newline at end of file diff --git a/tests/data/assets/ndjson/polyline_import_global_key.json b/tests/data/assets/ndjson/polyline_import_global_key.json new file mode 100644 index 000000000..f32612cfa --- /dev/null +++ b/tests/data/assets/ndjson/polyline_import_global_key.json @@ -0,0 +1,26 @@ +[ + { + "line": [ + { + "x": 2534.353, + "y": 249.471 + }, + { + "x": 2429.492, + "y": 182.092 + }, + { + "x": 2294.322, + "y": 221.962 + } + ], + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-line", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.58 + } +] \ No newline at end of file diff --git a/tests/data/assets/ndjson/text_entity_import_global_key.json b/tests/data/assets/ndjson/text_entity_import_global_key.json new file mode 100644 index 000000000..b8453f9e4 --- /dev/null +++ b/tests/data/assets/ndjson/text_entity_import_global_key.json @@ -0,0 +1,16 @@ +[ + { + "location": { + "start": 67, + "end": 128 + }, + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53 + } +] \ No newline at end of file diff --git a/tests/data/assets/ndjson/video_import_global_key.json b/tests/data/assets/ndjson/video_import_global_key.json new file mode 100644 index 000000000..568501dc5 --- /dev/null +++ b/tests/data/assets/ndjson/video_import_global_key.json @@ -0,0 +1,106 @@ +[ + { + "answer": {"schemaId": "ckrb1sfl8099g0y91cxbd5ftb"}, + "schemaId": "ckrb1sfjx099a0y914hl319ie", + "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "frames": [{"start": 30, "end": 35}, {"start": 50, "end": 51}] + }, + { + "answer": [{"schemaId": "ckrb1sfl8099e0y919v260awv"}], + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584", + "frames": [{"start": 0, "end": 5}] + }, + { + "answer": "a value", + "schemaId": "ckrb1sfkn099c0y910wbo0p1a", + "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, + "uuid": "d009925d-91a3-4f67-abd9-753453f5a584" + }, + { + "classifications": [], + "schemaId": "cl5islwg200gfci6g0oitaypu", + "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, + "uuid": "6f7c835a-0139-4896-b73f-66a6baa89e94", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "line": [{"x": 10.0, "y": 10.0}, {"x": 100.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] + }, + { + "frame": 5, + "line": [{"x": 15.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] + } + ] + }, + { + "keyframes": [ + { + "frame": 8, + "line": [{"x": 100.0, "y": 10.0}, {"x": 50.0, "y": 100.0}, {"x": 50.0, "y": 30.0}] + } + ] + } + ] + }, + { + "classifications": [], + "schemaId": "cl5it7ktp00i5ci6gf80b1ysd", + "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, + "uuid": "f963be22-227b-4efe-9be4-2738ed822216", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "point": {"x": 10.0, "y": 10.0} + } + ] + }, + { + "keyframes": [ + { + "frame": 5, + "point": {"x": 50.0, "y": 50.0} + }, + { + "frame": 10, + "point": {"x": 10.0, "y": 50.0} + } + ] + } + ] + }, + { + "classifications": [], + "schemaId": "cl5iw0roz00lwci6g5jni62vs", + "dataRow": {"globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d"}, + "uuid": "13b2ee0e-2355-4336-8b83-d74d09e3b1e7", + "segments": [ + { + "keyframes": [ + { + "frame": 1, + "bbox": {"top": 10.0, "left": 5.0, "height": 100.0, "width": 150.0} + }, + { + "frame": 5, + "bbox": {"top": 30.0, "left": 5.0, "height": 50.0, "width": 150.0} + } + ] + }, + { + "keyframes": [ + { + "frame": 10, + "bbox": {"top": 300.0, "left": 200.0, "height": 400.0, "width": 150.0} + } + ] + } + ] + } +] \ No newline at end of file diff --git a/tests/data/serialization/ndjson/test_global_key.py b/tests/data/serialization/ndjson/test_global_key.py new file mode 100644 index 000000000..7e3efdf6e --- /dev/null +++ b/tests/data/serialization/ndjson/test_global_key.py @@ -0,0 +1,67 @@ +import json +import pytest + +from labelbox.data.serialization.ndjson.classification import NDRadio + +from labelbox.data.serialization.ndjson.converter import NDJsonConverter +from labelbox.data.serialization.ndjson.objects import NDLine + + +def round_dict(data): + if isinstance(data, dict): + for key in data: + if isinstance(data[key], float): + data[key] = int(data[key]) + elif isinstance(data[key], dict): + data[key] = round_dict(data[key]) + elif isinstance(data[key], (list, tuple)): + data[key] = [round_dict(r) for r in data[key]] + + return data + + +@pytest.mark.parametrize('filename', [ + 'tests/data/assets/ndjson/classification_import_global_key.json', + 'tests/data/assets/ndjson/metric_import_global_key.json', + 'tests/data/assets/ndjson/polyline_import_global_key.json', + 'tests/data/assets/ndjson/text_entity_import_global_key.json' +]) +def test_many_types(filename: str): + with open(filename, 'r') as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == data + f.close() + + +def test_image(): + with open('tests/data/assets/ndjson/image_import_global_key.json', + 'r') as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + for r in res: + r.pop('classifications', None) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() + + +def test_pdf(): + with open('tests/data/assets/ndjson/pdf_import_global_key.json', 'r') as f: + data = json.load(f) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert [round_dict(x) for x in res] == [round_dict(x) for x in data] + f.close() + + +def test_video(): + with open('tests/data/assets/ndjson/video_import_global_key.json', + 'r') as f: + data = json.load(f) + + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == [data[2], data[0], data[1], data[3], data[4], data[5]] + f.close() diff --git a/tests/integration/annotation_import/test_label_import.py b/tests/integration/annotation_import/test_label_import.py index 9848186e0..44a308776 100644 --- a/tests/integration/annotation_import/test_label_import.py +++ b/tests/integration/annotation_import/test_label_import.py @@ -2,7 +2,6 @@ import pytest from labelbox.schema.annotation_import import AnnotationImportState, LabelImport -from labelbox.data.serialization import NDJsonConverter """ - Here we only want to check that the uploads are calling the validation - Then with unit tests we can check the types of errors raised @@ -38,62 +37,6 @@ def test_create_from_objects(client, configured_project, object_predictions, label_import.input_file_url, object_predictions) -def test_data_row_validation_errors(client, configured_project, - object_predictions): - name = str(uuid.uuid4()) - # Set up data for validation errors - # Invalid: Remove 'dataRow' part entirely - del object_predictions[0]['dataRow'] - - # Invalid: Set both id and globalKey - object_predictions[1]['dataRow'] = { - 'id': 'some id', - 'globalKey': 'some global key' - } - - # Valid - object_predictions[2]['dataRow'] = { - 'id': 'some id', - } - - # Valid - object_predictions[3]['dataRow'] = { - 'globalKey': 'some global key', - } - - with pytest.raises(ValueError) as exc_info: - label_import = LabelImport.create_from_objects( - client=client, - project_id=configured_project.uid, - name=name, - labels=object_predictions) - exception_str = str(exc_info.value) - assert "Found 2 annotations with errors" in exception_str - assert "'dataRow' is missing in" in exception_str - assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str - - -def test_create_from_label_objects(client, configured_project, - object_predictions, - annotation_import_test_helpers): - """this test should check running state only to validate running, not completed""" - name = str(uuid.uuid4()) - - labels = list(NDJsonConverter.deserialize(object_predictions)) - - label_import = LabelImport.create_from_objects( - client=client, - project_id=configured_project.uid, - name=name, - labels=labels) - - assert label_import.parent_id == configured_project.uid - annotation_import_test_helpers.check_running_state(label_import, name) - normalized_predictions = NDJsonConverter.serialize(labels) - annotation_import_test_helpers.assert_file_content( - label_import.input_file_url, normalized_predictions) - - # TODO: add me when we add this ability # def test_create_from_local_file(client, tmp_path, project, # object_predictions, annotation_import_test_helpers): diff --git a/tests/integration/annotation_import/test_mea_prediction_import.py b/tests/integration/annotation_import/test_mea_prediction_import.py index 531139fcc..afca122b6 100644 --- a/tests/integration/annotation_import/test_mea_prediction_import.py +++ b/tests/integration/annotation_import/test_mea_prediction_import.py @@ -11,38 +11,6 @@ """ -def test_data_row_validation_errors(model_run_with_all_project_labels, - object_predictions): - name = str(uuid.uuid4()) - # Set up data for validation errors - # Invalid: Remove 'dataRow' part entirely - del object_predictions[0]['dataRow'] - - # Invalid: Set both id and globalKey - object_predictions[1]['dataRow'] = { - 'id': 'some id', - 'globalKey': 'some global key' - } - - # Valid - object_predictions[2]['dataRow'] = { - 'id': 'some id', - } - - # Valid - object_predictions[3]['dataRow'] = { - 'globalKey': 'some global key', - } - - with pytest.raises(ValueError) as exc_info: - model_run_with_all_project_labels.add_predictions( - name=name, predictions=object_predictions) - exception_str = str(exc_info.value) - assert "Found 2 annotations with errors" in exception_str - assert "'dataRow' is missing in" in exception_str - assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str - - def test_create_from_url(model_run_with_model_run_data_rows, annotation_import_test_helpers): name = str(uuid.uuid4()) diff --git a/tests/unit/test_annotation_import.py b/tests/unit/test_annotation_import.py new file mode 100644 index 000000000..ff0835467 --- /dev/null +++ b/tests/unit/test_annotation_import.py @@ -0,0 +1,85 @@ +import pytest + +from labelbox.schema.annotation_import import AnnotationImport + + +def test_data_row_validation_errors(): + predictions = [ + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + }, + "schemaId": "c123", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + }, + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + }, + "schemaId": "c123", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + }, + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + }, + "schemaId": "c123", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + }, + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + }, + "schemaId": "c123", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + }, + { + "answer": { + "schemaId": "ckrb1sfl8099g0y91cxbd5ftb", + }, + "schemaId": "c123", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + }, + ] + + # Set up data for validation errors + # Invalid: Remove 'dataRow' part entirely + del predictions[0]['dataRow'] + + # Invalid: Set both id and globalKey + predictions[1]['dataRow'] = { + 'id': 'some id', + 'globalKey': 'some global key' + } + + # Invalid: Set both id and globalKey to None + predictions[2]['dataRow'] = {'id': None, 'globalKey': None} + + # Valid + predictions[3]['dataRow'] = { + 'id': 'some id', + } + + # Valid + predictions[4]['dataRow'] = { + 'globalKey': 'some global key', + } + + with pytest.raises(ValueError) as exc_info: + AnnotationImport._validate_data_rows(predictions) + exception_str = str(exc_info.value) + assert "Found 3 annotations with errors" in exception_str + assert "'dataRow' is missing in" in exception_str + assert "Must provide only one of 'id' or 'globalKey' for 'dataRow'" in exception_str + assert "'dataRow': {'id': 'some id', 'globalKey': 'some global key'}" in exception_str + assert "'dataRow': {'id': None, 'globalKey': None}" in exception_str From 2ba04b937706a7ef56831f4c6e7751a6bacb135e Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Mon, 13 Mar 2023 16:53:47 -0700 Subject: [PATCH 5/5] Add _CamelCaseMixin --- labelbox/data/serialization/ndjson/base.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/labelbox/data/serialization/ndjson/base.py b/labelbox/data/serialization/ndjson/base.py index 493771bfe..4a2946e1f 100644 --- a/labelbox/data/serialization/ndjson/base.py +++ b/labelbox/data/serialization/ndjson/base.py @@ -2,26 +2,22 @@ from uuid import uuid4 from pydantic import BaseModel, root_validator, validator, Field -from labelbox.utils import camel_case, is_exactly_one_set +from labelbox.utils import _CamelCaseMixin, camel_case, is_exactly_one_set from ...annotation_types.types import Cuid -class DataRow(BaseModel): +class DataRow(_CamelCaseMixin): id: str = None global_key: str = None @root_validator() def must_set_one(cls, values): - if is_exactly_one_set(values.get('id'), values.get('global_key')): + if not is_exactly_one_set(values.get('id'), values.get('global_key')): raise ValueError("Must set either id or global_key") return values - class Config: - allow_population_by_field_name = True - alias_generator = camel_case - -class NDJsonBase(BaseModel): +class NDJsonBase(_CamelCaseMixin): uuid: str = None data_row: DataRow @@ -38,10 +34,6 @@ def dict(self, *args, **kwargs): res['dataRow'].pop('globalKey') return res - class Config: - allow_population_by_field_name = True - alias_generator = camel_case - class NDAnnotation(NDJsonBase): name: Optional[str] = None