From 2f064a05773e25eaba4ff8a5b4539eea3cc82191 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Mon, 20 Mar 2023 14:42:24 -0700 Subject: [PATCH] Add global key support to ConversationEntity, DocumentEntity, DicomSegments --- labelbox/data/serialization/ndjson/objects.py | 6 +- ...conversation_entity_import_global_key.json | 15 +++++ .../ndjson/pdf_document_entity_import.json | 25 --------- tests/data/assets/ndjson/pdf_import.json | 28 +++++++++- .../assets/ndjson/pdf_import_global_key.json | 27 ++++++++- .../assets/ndjson/pdf_import_name_only.json | 24 ++++++++ .../ndjson/test_classification.py | 2 - .../serialization/ndjson/test_conversation.py | 17 ++++++ tests/data/serialization/ndjson/test_dicom.py | 55 +++++++++++++++++++ .../serialization/ndjson/test_document.py | 14 ----- .../serialization/ndjson/test_global_key.py | 3 +- .../serialization/ndjson/test_text_entity.py | 12 ---- 12 files changed, 169 insertions(+), 59 deletions(-) create mode 100644 tests/data/assets/ndjson/conversation_entity_import_global_key.json delete mode 100644 tests/data/assets/ndjson/pdf_document_entity_import.json create mode 100644 tests/data/serialization/ndjson/test_conversation.py diff --git a/labelbox/data/serialization/ndjson/objects.py b/labelbox/data/serialization/ndjson/objects.py index 27470df92..4576a748c 100644 --- a/labelbox/data/serialization/ndjson/objects.py +++ b/labelbox/data/serialization/ndjson/objects.py @@ -353,7 +353,7 @@ def from_common(cls, segments: List[DICOMObjectAnnotation], data: VideoData, segments = [NDDicomSegment.from_common(segment) for segment in segments] return cls(segments=segments, - dataRow=DataRow(id=data.uid), + dataRow=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -465,7 +465,7 @@ def from_common(cls, confidence: Optional[float] = None) -> "NDDocumentEntity": return cls(text_selections=document_entity.text_selections, - dataRow=DataRow(id=data.uid), + dataRow=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), @@ -494,7 +494,7 @@ def from_common( return cls(location=Location(start=conversation_entity.start, end=conversation_entity.end), message_id=conversation_entity.message_id, - dataRow=DataRow(id=data.uid), + dataRow=DataRow(id=data.uid, global_key=data.global_key), name=name, schema_id=feature_schema_id, uuid=extra.get('uuid'), diff --git a/tests/data/assets/ndjson/conversation_entity_import_global_key.json b/tests/data/assets/ndjson/conversation_entity_import_global_key.json new file mode 100644 index 000000000..07584dae4 --- /dev/null +++ b/tests/data/assets/ndjson/conversation_entity_import_global_key.json @@ -0,0 +1,15 @@ +[{ + "location": { + "start": 67, + "end": 128 + }, + "messageId": "some-message-id", + "uuid": "5ad9c52f-058d-49c8-a749-3f20b84f8cd4", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "some-text-entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "confidence": 0.53 +}] diff --git a/tests/data/assets/ndjson/pdf_document_entity_import.json b/tests/data/assets/ndjson/pdf_document_entity_import.json deleted file mode 100644 index 4ec642aa5..000000000 --- a/tests/data/assets/ndjson/pdf_document_entity_import.json +++ /dev/null @@ -1,25 +0,0 @@ -[ - { - "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", - "dataRow": { - "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" - }, - "name": "named_entity", - "classifications": [], - "textSelections": [ - { - "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", - "tokenIds": [ - "3f984bf3-1d61-44f5-b59a-9658a2e3440f", - "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", - "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", - "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", - "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", - "67c7c19e-4654-425d-bf17-2adb8cf02c30", - "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", - "b0e94071-2187-461e-8e76-96c58738a52c" - ], - "page": 1 } - ] - } -] \ No newline at end of file diff --git a/tests/data/assets/ndjson/pdf_import.json b/tests/data/assets/ndjson/pdf_import.json index 2d9efb854..2f64bae6f 100644 --- a/tests/data/assets/ndjson/pdf_import.json +++ b/tests/data/assets/ndjson/pdf_import.json @@ -97,4 +97,30 @@ "height": 203.83000000000004, "width": 0.37999999999999545 } -}] \ No newline at end of file +}, +{ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" + }, + "name": "named_entity", + "schemaId": "cl6xnuwt95lqq07330tbb3mfd", + "classifications": [], + "textSelections": [ + { + "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + "tokenIds": [ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + "page": 1 + } + ] +} +] \ No newline at end of file diff --git a/tests/data/assets/ndjson/pdf_import_global_key.json b/tests/data/assets/ndjson/pdf_import_global_key.json index 8a14273d6..121078cd8 100644 --- a/tests/data/assets/ndjson/pdf_import_global_key.json +++ b/tests/data/assets/ndjson/pdf_import_global_key.json @@ -97,4 +97,29 @@ "height": 203.83000000000004, "width": 0.37999999999999545 } -}] \ No newline at end of file +}, +{ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "dataRow": { + "globalKey": "05e8ee85-072e-4eb2-b30a-501dee9b0d9d" + }, + "name": "named_entity", + "classifications": [], + "textSelections": [ + { + "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + "tokenIds": [ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + "page": 1 + } + ] +} +] \ No newline at end of file diff --git a/tests/data/assets/ndjson/pdf_import_name_only.json b/tests/data/assets/ndjson/pdf_import_name_only.json index 0a14d0291..cfff5d149 100644 --- a/tests/data/assets/ndjson/pdf_import_name_only.json +++ b/tests/data/assets/ndjson/pdf_import_name_only.json @@ -90,4 +90,28 @@ "height": 203.83000000000004, "width": 0.37999999999999545 } +}, +{ + "uuid": "f6879f59-d2b5-49c2-aceb-d9e8dc478673", + "dataRow": { + "id": "ckrb1sf1i1g7i0ybcdc6oc8ct" + }, + "name": "named_entity", + "classifications": [], + "textSelections": [ + { + "groupId": "2f4336f4-a07e-4e0a-a9e1-5629b03b719b", + "tokenIds": [ + "3f984bf3-1d61-44f5-b59a-9658a2e3440f", + "3bf00b56-ff12-4e52-8cc1-08dbddb3c3b8", + "6e1c3420-d4b7-4c5a-8fd6-ead43bf73d80", + "87a43d32-af76-4a1d-b262-5c5f4d5ace3a", + "e8606e8a-dfd9-4c49-a635-ad5c879c75d0", + "67c7c19e-4654-425d-bf17-2adb8cf02c30", + "149c5e80-3e07-49a7-ab2d-29ddfe6a38fa", + "b0e94071-2187-461e-8e76-96c58738a52c" + ], + "page": 1 + } + ] }] \ No newline at end of file diff --git a/tests/data/serialization/ndjson/test_classification.py b/tests/data/serialization/ndjson/test_classification.py index 1c56d90ec..00a684b20 100644 --- a/tests/data/serialization/ndjson/test_classification.py +++ b/tests/data/serialization/ndjson/test_classification.py @@ -1,8 +1,6 @@ import json -from labelbox.data.serialization.ndjson.classification import NDRadio from labelbox.data.serialization.ndjson.converter import NDJsonConverter -from labelbox.data.serialization.ndjson.objects import NDLine def test_classification(): diff --git a/tests/data/serialization/ndjson/test_conversation.py b/tests/data/serialization/ndjson/test_conversation.py new file mode 100644 index 000000000..ee475b3c0 --- /dev/null +++ b/tests/data/serialization/ndjson/test_conversation.py @@ -0,0 +1,17 @@ +import json + +import pytest + +from labelbox.data.serialization.ndjson.converter import NDJsonConverter + + +@pytest.mark.parametrize("filename", [ + "tests/data/assets/ndjson/conversation_entity_import.json", + "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json" +]) +def test_conversation_entity_import(filename: str): + with open(filename, 'r') as file: + data = json.load(file) + res = list(NDJsonConverter.deserialize(data)) + res = list(NDJsonConverter.serialize(res)) + assert res == data diff --git a/tests/data/serialization/ndjson/test_dicom.py b/tests/data/serialization/ndjson/test_dicom.py index 4f204a9be..ab2d65921 100644 --- a/tests/data/serialization/ndjson/test_dicom.py +++ b/tests/data/serialization/ndjson/test_dicom.py @@ -19,6 +19,9 @@ label = lb_types.Label(data=lb_types.DicomData(uid="test-uid"), annotations=dicom_polyline_annotations) +label_with_global_key = lb_types.Label( + data=lb_types.DicomData(global_key="test-global-key"), + annotations=dicom_polyline_annotations) label_ndjson = { 'classifications': [], @@ -51,6 +54,37 @@ }], } +label_ndjson_with_global_key = { + 'classifications': [], + 'dataRow': { + 'globalKey': 'test-global-key' + }, + 'name': + 'dicom_polyline', + 'groupKey': + 'axial', + 'segments': [{ + 'keyframes': [{ + 'frame': + 2, + 'line': [ + { + 'x': 680.0, + 'y': 100.0 + }, + { + 'x': 100.0, + 'y': 190.0 + }, + { + 'x': 190.0, + 'y': 220.0 + }, + ] + }] + }], +} + def test_serialize_dicom_polyline_annotation(): serialized_label = next(NDJsonConverter().serialize([label])) @@ -58,12 +92,26 @@ def test_serialize_dicom_polyline_annotation(): assert serialized_label == label_ndjson +def test_serialize_dicom_polyline_annotation_with_global_key(): + serialized_label = next(NDJsonConverter().serialize([label_with_global_key + ])) + serialized_label.pop('uuid') + assert serialized_label == label_ndjson_with_global_key + + def test_deserialize_dicom_polyline_annotation(): deserialized_label = next(NDJsonConverter().deserialize([label_ndjson])) deserialized_label.annotations[0].extra.pop('uuid') assert deserialized_label == label +def test_deserialize_dicom_polyline_annotation_with_global_key(): + deserialized_label = next(NDJsonConverter().deserialize( + [label_ndjson_with_global_key])) + deserialized_label.annotations[0].extra.pop('uuid') + assert deserialized_label == label_with_global_key + + def test_serialize_deserialize_dicom_polyline_annotation(): labels = list(NDJsonConverter.deserialize([label_ndjson])) res = list(NDJsonConverter.serialize(labels)) @@ -71,6 +119,13 @@ def test_serialize_deserialize_dicom_polyline_annotation(): assert res == [label_ndjson] +def test_serialize_deserialize_dicom_polyline_annotation_with_global_key(): + labels = list(NDJsonConverter.deserialize([label_ndjson_with_global_key])) + res = list(NDJsonConverter.serialize(labels)) + res[0].pop('uuid') + assert res == [label_ndjson_with_global_key] + + def test_deserialize_nd_dicom_segments(): nd_dicom_segments = NDDicomSegments(**label_ndjson) assert isinstance(nd_dicom_segments, NDDicomSegments) diff --git a/tests/data/serialization/ndjson/test_document.py b/tests/data/serialization/ndjson/test_document.py index 3d941bb2b..3120d366c 100644 --- a/tests/data/serialization/ndjson/test_document.py +++ b/tests/data/serialization/ndjson/test_document.py @@ -28,20 +28,6 @@ def test_pdf(): f.close() -def test_pdf_document_entity(): - """ - Tests a pdf file with bbox annotations only - """ - with open('tests/data/assets/ndjson/pdf_document_entity_import.json', - 'r') as f: - data = json.load(f) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert [round_dict(x) for x in res] == [round_dict(x) for x in data] - - f.close() - - def test_pdf_with_name_only(): """ Tests a pdf file with bbox annotations only diff --git a/tests/data/serialization/ndjson/test_global_key.py b/tests/data/serialization/ndjson/test_global_key.py index 7e3efdf6e..6de2dcc51 100644 --- a/tests/data/serialization/ndjson/test_global_key.py +++ b/tests/data/serialization/ndjson/test_global_key.py @@ -24,7 +24,8 @@ def round_dict(data): 'tests/data/assets/ndjson/classification_import_global_key.json', 'tests/data/assets/ndjson/metric_import_global_key.json', 'tests/data/assets/ndjson/polyline_import_global_key.json', - 'tests/data/assets/ndjson/text_entity_import_global_key.json' + 'tests/data/assets/ndjson/text_entity_import_global_key.json', + 'tests/data/assets/ndjson/conversation_entity_import_global_key.json', ]) def test_many_types(filename: str): with open(filename, 'r') as f: diff --git a/tests/data/serialization/ndjson/test_text_entity.py b/tests/data/serialization/ndjson/test_text_entity.py index 3010b2752..f62d87ebc 100644 --- a/tests/data/serialization/ndjson/test_text_entity.py +++ b/tests/data/serialization/ndjson/test_text_entity.py @@ -16,15 +16,3 @@ def test_text_entity_import(filename: str): res = list(NDJsonConverter.deserialize(data)) res = list(NDJsonConverter.serialize(res)) assert res == data - - -@pytest.mark.parametrize("filename", [ - "tests/data/assets/ndjson/conversation_entity_import.json", - "tests/data/assets/ndjson/conversation_entity_without_confidence_import.json" -]) -def test_conversation_entity_import(filename: str): - with open(filename, 'r') as file: - data = json.load(file) - res = list(NDJsonConverter.deserialize(data)) - res = list(NDJsonConverter.serialize(res)) - assert res == data \ No newline at end of file