From 22b97bdbdbb6a76bd0469156d7175c7c4640d706 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Mon, 24 Oct 2022 15:02:04 -0400 Subject: [PATCH 01/14] data row content under row_data --- labelbox/schema/dataset.py | 57 +++++++-------- tests/integration/test_data_rows.py | 109 ++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+), 31 deletions(-) diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index a2deb62ab..8afb983f1 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -229,8 +229,8 @@ def _create_descriptor_file(self, items, max_attachments_per_data_row=None): >>> {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"}, >>> {DataRow.row_data:"/path/to/file1.jpg"}, >>> "path/to/file2.jpg", - >>> {"tileLayerUrl" : "http://", ...} - >>> {"conversationalData" : [...], ...} + >>> {DataRow.row_data: {"tileLayerUrl" : "http://", ...}} + >>> {DataRow.row_data: {"type" : ..., 'version' : ..., 'messages' : [...]}} >>> ]) For an example showing how to upload tiled data_rows see the following notebook: @@ -258,7 +258,7 @@ def _create_descriptor_file(self, items, max_attachments_per_data_row=None): def upload_if_necessary(item): row_data = item['row_data'] - if os.path.exists(row_data): + if isinstance(row_data, str) and os.path.exists(row_data): item_url = self.client.upload_file(row_data) item['row_data'] = item_url if 'external_id' not in item: @@ -341,40 +341,39 @@ def validate_keys(item): "`row_data` missing when creating DataRow.") invalid_keys = set(item) - { - *{f.name for f in DataRow.fields()}, 'attachments' + *{f.name for f in DataRow.fields()}, 'attachments', 'media_type' } if invalid_keys: raise InvalidAttributeError(DataRow, invalid_keys) return item + def formatLegacyConversationalData(item): + messages = item.pop("conversationalData") + version = item.pop("version") + type = item.pop("type") + if "externalId" in item: + external_id = item.pop("externalId") + item["external_id"] = external_id + if "globalKey" in item: + global_key = item.pop("globalKey") + item["globalKey"] = global_key + validate_conversational_data(messages) + one_conversation = \ + { + "type": type, + "version": version, + "messages": messages + } + item["row_data"] = one_conversation + return item + def convert_item(item): - # Don't make any changes to tms data if "tileLayerUrl" in item: validate_attachments(item) return item if "conversationalData" in item: - messages = item.pop("conversationalData") - version = item.pop("version") - type = item.pop("type") - if "externalId" in item: - external_id = item.pop("externalId") - item["external_id"] = external_id - if "globalKey" in item: - global_key = item.pop("globalKey") - item["globalKey"] = global_key - validate_conversational_data(messages) - one_conversation = \ - { - "type": type, - "version": version, - "messages": messages - } - conversationUrl = self.client.upload_data( - json.dumps(one_conversation), - content_type="application/json", - filename="conversational_data.json") - item["row_data"] = conversationUrl + formatLegacyConversationalData(item) # Convert all payload variations into the same dict format item = format_row(item) @@ -386,11 +385,7 @@ def convert_item(item): parse_metadata_fields(item) # Upload any local file paths item = upload_if_necessary(item) - - return { - "data" if key == "row_data" else utils.camel_case(key): value - for key, value in item.items() - } + return item if not isinstance(items, Iterable): raise ValueError( diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index 563358058..d98e859d5 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -1,6 +1,7 @@ from tempfile import NamedTemporaryFile import uuid from datetime import datetime +import json import pytest import requests @@ -695,3 +696,111 @@ def test_data_row_rulk_creation_sync_with_same_global_keys( assert len(list(dataset.data_rows())) == 1 assert list(dataset.data_rows())[0].global_key == global_key_1 + + +def test_create_conversational_text(dataset): + content = { + 'row_data': { + "messages": [{ + "messageId": "message-0", + "timestampUsec": 1530718491, + "content": "I love iphone! i just bought new iphone! 🥰 📲", + "user": { + "userId": "Bot 002", + "name": "Bot" + }, + "align": "left", + "canLabel": False + }], + "version": 1, + "type": "application/vnd.labelbox.conversational" + } + } + examples = [ + { + **content, 'media_type': 'CONVERSATIONAL_TEXT' + }, + content, + content['row_data'] # Old way to check for backwards compatibility + ] + dataset.create_data_rows_sync(examples) + data_rows = list(dataset.data_rows()) + assert len(data_rows) == len(examples) + for data_row in data_rows: + assert requests.get(data_row.row_data).json() == content['row_data'] + + +def test_invalid_media_type(dataset): + content = { + 'row_data': { + "messages": [{ + "messageId": "message-0", + "timestampUsec": 1530718491, + "content": "I love iphone! i just bought new iphone! 🥰 📲", + "user": { + "userId": "Bot 002", + "name": "Bot" + }, + "align": "left", + "canLabel": False + }], + "version": 1, + "type": "application/vnd.labelbox.conversational" + } + } + + for error_message, invalid_media_type in [[ + "Found invalid contents for media type: 'IMAGE'", 'IMAGE' + ], ["Found invalid media type: 'totallyinvalid'", 'totallyinvalid']]: + # TODO: What error kind should this be? It looks like for global key we are + # using malformed query. But for FileUploads we use InvalidQueryError + with pytest.raises(labelbox.exceptions.InvalidQueryError): + dataset.create_data_rows_sync([{ + **content, 'media_type': invalid_media_type + }]) + + task = dataset.create_data_rows([{ + **content, 'media_type': invalid_media_type + }]) + task.wait_till_done() + assert task.errors == {'message': error_message} + + +def test_create_tiled_layer(dataset): + content = { + "row_data": { + "tileLayerUrl": + "https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png", + "bounds": [[19.405662413477728, -99.21052827588443], + [19.400498983095076, -99.20534818927473]], + "minZoom": + 12, + "maxZoom": + 20, + "epsg": + "EPSG4326", + "alternativeLayers": [{ + "tileLayerUrl": + "https://api.mapbox.com/styles/v1/mapbox/satellite-streets-v11/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw", + "name": + "Satellite" + }, { + "tileLayerUrl": + "https://api.mapbox.com/styles/v1/mapbox/navigation-guidance-night-v4/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw", + "name": + "Guidance" + }] + } + } + examples = [ + { + **content, 'media_type': 'TMS_SIMPLE' + }, + content, + content['row_data'] # Old way to check for backwards compatibility + ] + dataset.create_data_rows_sync(examples) + data_rows = list(dataset.data_rows()) + assert len(data_rows) == len(examples) + for data_row in data_rows: + assert json.loads(data_row.row_data) == content['row_data'] From 272e149b3e6ebe679712dafc815588e30574b4aa Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Mon, 24 Oct 2022 18:19:21 -0400 Subject: [PATCH 02/14] create data row from objects --- labelbox/orm/model.py | 4 ++-- labelbox/schema/data_row.py | 7 +++++-- labelbox/schema/dataset.py | 8 ++++++-- labelbox/schema/media_type.py | 4 ++-- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/labelbox/orm/model.py b/labelbox/orm/model.py index 9427489ff..a964a0607 100644 --- a/labelbox/orm/model.py +++ b/labelbox/orm/model.py @@ -104,8 +104,8 @@ def DateTime(*args): return Field(Field.Type.DateTime, *args) @staticmethod - def Enum(enum_cls: type, *args): - return Field(Field.EnumType(enum_cls), *args) + def Enum(enum_cls: type, *args, **kwargs): + return Field(Field.EnumType(enum_cls), *args, **kwargs) @staticmethod def Json(*args): diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py index 4c7bb8287..e33364ad7 100644 --- a/labelbox/schema/data_row.py +++ b/labelbox/schema/data_row.py @@ -5,6 +5,7 @@ from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable from labelbox.orm.model import Entity, Field, Relationship from labelbox.schema.data_row_metadata import DataRowMetadataField # type: ignore +from labelbox.schema.media_type import MediaType if TYPE_CHECKING: from labelbox import AssetAttachment @@ -48,6 +49,7 @@ class DataRow(DbObject, Updateable, BulkDeletable): name="metadata", graphql_name="customMetadata", result_subquery="customMetadata { schemaId value }") + media_type = Field.Enum(MediaType, "media_type", result_subquery="") # Relationships dataset = Relationship.ToOne("Dataset") @@ -59,8 +61,9 @@ class DataRow(DbObject, Updateable, BulkDeletable): supported_meta_types = supported_attachment_types = set( Entity.AssetAttachment.AttachmentType.__members__) - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, client, field_values, **kwargs): + field_values.update({'mediaType': MediaType.Unknown}) + super().__init__(client, field_values, **kwargs) self.attachments.supports_filtering = False self.attachments.supports_sorting = False diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 8afb983f1..b820d3c19 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -95,9 +95,12 @@ def convert_field_keys(items): raise InvalidQueryError( "DataRow.row_data missing when creating DataRow.") - # If row data is a local file path, upload it to server. row_data = args[DataRow.row_data.name] - if os.path.exists(row_data): + if not isinstance(row_data, str): + # If the row data is an object, upload as a string + args[DataRow.row_data.name] = json.dumps(row_data) + elif os.path.exists(row_data): + # If row data is a local file path, upload it to server. args[DataRow.row_data.name] = self.client.upload_file(row_data) args[DataRow.dataset.name] = self @@ -106,6 +109,7 @@ def convert_field_keys(items): mdo = self.client.get_data_row_metadata_ontology() args[DataRow.metadata_fields.name] = mdo.parse_upsert_metadata( args[DataRow.metadata_fields.name]) + return self.client._create(DataRow, args) def create_data_rows_sync(self, items) -> None: diff --git a/labelbox/schema/media_type.py b/labelbox/schema/media_type.py index c4e139a67..aaddb83be 100644 --- a/labelbox/schema/media_type.py +++ b/labelbox/schema/media_type.py @@ -21,9 +21,9 @@ class MediaType(Enum): @classmethod def _missing_(cls, name): - """Handle missing null data types for projects + """Handle missing null data types for projects created without setting allowedMediaType - Handle upper case names for compatibility with + Handle upper case names for compatibility with the GraphQL""" if name is None: From 6b9f0e378b94e050344435c7e92d7a69be92863a Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Tue, 25 Oct 2022 06:28:54 -0400 Subject: [PATCH 03/14] add more tests --- tests/integration/test_dataset.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py index 8237f77bd..9cd40a07d 100644 --- a/tests/integration/test_dataset.py +++ b/tests/integration/test_dataset.py @@ -2,7 +2,7 @@ import pytest import requests from labelbox import Dataset -from labelbox.exceptions import ResourceNotFoundError, MalformedQueryException +from labelbox.exceptions import ResourceNotFoundError, MalformedQueryException, InvalidQueryError from labelbox.schema.dataset import MAX_DATAROW_PER_API_OPERATION @@ -103,6 +103,33 @@ def test_upload_video_file(dataset, sample_video: str) -> None: assert response.headers['Content-Type'] == 'video/mp4' +def test_create_pdf(dataset): + dataset.create_data_row( + row_data={ + "pdfUrl": + "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-1.pdf", + "textLayerUrl": + "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-custom-text-layer.json" + }) + dataset.create_data_row(row_data={ + "pdfUrl": + "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-1.pdf", + "textLayerUrl": + "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-custom-text-layer.json" + }, + media_type="PDF") + + with pytest.raises(InvalidQueryError): + # Wrong media type + dataset.create_data_row(row_data={ + "pdfUrl": + "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-1.pdf", + "textLayerUrl": + "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-custom-text-layer.json" + }, + media_type="TEXT") + + def test_bulk_conversation(dataset, sample_bulk_conversation: list) -> None: """ Tests that bulk conversations can be uploaded. From 8e35a26f8fc19de946f0c0924b8e4913898a101e Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Tue, 25 Oct 2022 07:44:21 -0400 Subject: [PATCH 04/14] fix tests --- labelbox/schema/data_row.py | 9 ++++++--- labelbox/schema/dataset.py | 4 ++-- labelbox/test.py | 18 ++++++++++++++++++ .../test_data_row_media_attributes.py | 2 +- tests/integration/test_data_rows.py | 8 +++++--- 5 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 labelbox/test.py diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py index e33364ad7..1cfe41a28 100644 --- a/labelbox/schema/data_row.py +++ b/labelbox/schema/data_row.py @@ -61,12 +61,15 @@ class DataRow(DbObject, Updateable, BulkDeletable): supported_meta_types = supported_attachment_types = set( Entity.AssetAttachment.AttachmentType.__members__) - def __init__(self, client, field_values, **kwargs): - field_values.update({'mediaType': MediaType.Unknown}) - super().__init__(client, field_values, **kwargs) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) self.attachments.supports_filtering = False self.attachments.supports_sorting = False + def _set_field_values(self, field_values): + field_values.update({'mediaType': MediaType.Unknown}) + super()._set_field_values(field_values) + @staticmethod def bulk_delete(data_rows) -> None: """ Deletes all the given DataRows. diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index b820d3c19..7833d8286 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -353,8 +353,8 @@ def validate_keys(item): def formatLegacyConversationalData(item): messages = item.pop("conversationalData") - version = item.pop("version") - type = item.pop("type") + version = item.pop("version", 1) + type = item.pop("type", "application/vnd.labelbox.conversational") if "externalId" in item: external_id = item.pop("externalId") item["external_id"] = external_id diff --git a/labelbox/test.py b/labelbox/test.py new file mode 100644 index 000000000..8fd8365a6 --- /dev/null +++ b/labelbox/test.py @@ -0,0 +1,18 @@ +def run(fn, model_run): + try: + fn() + except Exception as e: + model_run.update_status(error_message=error) + pipelines[pipeline].update_status(PipelineState.FAILED, + json_data['model_run_id'], + error_message=str(e)) + else: + status + + +def model_run(payload): + + def etl(): + payload + + run(etl) diff --git a/tests/integration/test_data_row_media_attributes.py b/tests/integration/test_data_row_media_attributes.py index d2e1c10b0..e2a594627 100644 --- a/tests/integration/test_data_row_media_attributes.py +++ b/tests/integration/test_data_row_media_attributes.py @@ -7,4 +7,4 @@ def test_export_empty_media_attributes(configured_project_with_label): sleep(10) labels = project.label_generator() label = next(labels) - assert label.data.media_attributes == {} \ No newline at end of file + assert label.data.media_attributes == {} diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index 4519572d7..5fcdc8874 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -719,10 +719,12 @@ def test_create_conversational_text(dataset): } examples = [ { - **content, 'media_type': 'CONVERSATIONAL_TEXT' + **content, 'media_type': 'CONVERSATIONAL' }, content, - content['row_data'] # Old way to check for backwards compatibility + { + "conversationalData": content['row_data']['messages'] + } # Old way to check for backwards compatibility ] dataset.create_data_rows_sync(examples) data_rows = list(dataset.data_rows()) @@ -754,7 +756,7 @@ def test_invalid_media_type(dataset): "Found invalid contents for media type: 'IMAGE'", 'IMAGE' ], ["Found invalid media type: 'totallyinvalid'", 'totallyinvalid']]: # TODO: What error kind should this be? It looks like for global key we are - # using malformed query. But for FileUploads we use InvalidQueryError + # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError with pytest.raises(labelbox.exceptions.InvalidQueryError): dataset.create_data_rows_sync([{ **content, 'media_type': invalid_media_type From 898b471bbda33029dfaf67edefade73ec9bd81c8 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Tue, 25 Oct 2022 07:59:52 -0400 Subject: [PATCH 05/14] update tests --- tests/integration/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py index 9cd40a07d..89a89b78c 100644 --- a/tests/integration/test_dataset.py +++ b/tests/integration/test_dataset.py @@ -160,7 +160,7 @@ def test_create_descriptor_file(dataset): upload_data_spy.assert_called() call_args, call_kwargs = upload_data_spy.call_args_list[0][ 0], upload_data_spy.call_args_list[0][1] - assert call_args == ('[{"data": "some text..."}]',) + assert call_args == ('[{"row_data": "some text..."}]',) assert call_kwargs == { 'content_type': 'application/json', 'filename': 'json_import.json' From d181d0fa5bb724832bc326f397938defb8b9e599 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Tue, 1 Nov 2022 09:17:17 -0400 Subject: [PATCH 06/14] add support for attachments --- labelbox/orm/model.py | 4 ++-- labelbox/schema/data_row.py | 2 -- labelbox/schema/dataset.py | 25 +++++++++++++++++++++++-- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/labelbox/orm/model.py b/labelbox/orm/model.py index cce43b571..79783c10a 100644 --- a/labelbox/orm/model.py +++ b/labelbox/orm/model.py @@ -104,8 +104,8 @@ def DateTime(*args): return Field(Field.Type.DateTime, *args) @staticmethod - def Enum(enum_cls: type, *args, **kwargs): - return Field(Field.EnumType(enum_cls), *args, **kwargs) + def Enum(enum_cls: type, *args): + return Field(Field.EnumType(enum_cls), *args) @staticmethod def Json(*args): diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py index 1cfe41a28..24fc25e9c 100644 --- a/labelbox/schema/data_row.py +++ b/labelbox/schema/data_row.py @@ -49,7 +49,6 @@ class DataRow(DbObject, Updateable, BulkDeletable): name="metadata", graphql_name="customMetadata", result_subquery="customMetadata { schemaId value }") - media_type = Field.Enum(MediaType, "media_type", result_subquery="") # Relationships dataset = Relationship.ToOne("Dataset") @@ -67,7 +66,6 @@ def __init__(self, *args, **kwargs): self.attachments.supports_sorting = False def _set_field_values(self, field_values): - field_values.update({'mediaType': MediaType.Unknown}) super()._set_field_values(field_values) @staticmethod diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 7833d8286..103846a97 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -15,6 +15,7 @@ from labelbox.exceptions import InvalidQueryError, LabelboxError, ResourceNotFoundError, InvalidAttributeError from labelbox.orm.db_object import DbObject, Updateable, Deletable from labelbox.orm.model import Entity, Field, Relationship +from labelbox.orm import query from labelbox.exceptions import MalformedQueryException if TYPE_CHECKING: @@ -102,7 +103,6 @@ def convert_field_keys(items): elif os.path.exists(row_data): # If row data is a local file path, upload it to server. args[DataRow.row_data.name] = self.client.upload_file(row_data) - args[DataRow.dataset.name] = self # Parse metadata fields, if they are provided if DataRow.metadata_fields.name in args: @@ -110,7 +110,28 @@ def convert_field_keys(items): args[DataRow.metadata_fields.name] = mdo.parse_upsert_metadata( args[DataRow.metadata_fields.name]) - return self.client._create(DataRow, args) + query_str = """mutation CreateDataRowPyApi( + $row_data: String!, + $metadata_fields: [DataRowCustomMetadataUpsertInput!]!, + $attachments: [DataRowAttachmentInput!], + $media_type : MediaType, + $dataset: ID! + ){ + createDataRow( + data: + { + rowData: $row_data + mediaType: $media_type + metadataFields: $metadata_fields + attachments: $attachments + dataset: {connect: {id: $dataset}} + } + ) + {%s} + } + """ % query.results_query_part(Entity.DataRow) + res = self.client.execute(query_str, {**args, 'dataset': self.uid}) + return DataRow(self.client, res['createDataRow']) def create_data_rows_sync(self, items) -> None: """ Synchronously bulk upload data rows. From 976fe6f36c17d98aab02fc598971f68d48cb1a5b Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Tue, 1 Nov 2022 10:40:20 -0400 Subject: [PATCH 07/14] add tests --- labelbox/schema/dataset.py | 2 +- tests/integration/test_data_rows.py | 153 +++++++++++++++------------- 2 files changed, 83 insertions(+), 72 deletions(-) diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 103846a97..75641e3c5 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -112,7 +112,7 @@ def convert_field_keys(items): query_str = """mutation CreateDataRowPyApi( $row_data: String!, - $metadata_fields: [DataRowCustomMetadataUpsertInput!]!, + $metadata_fields: [DataRowCustomMetadataUpsertInput!], $attachments: [DataRowAttachmentInput!], $media_type : MediaType, $dataset: ID! diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index 5fcdc8874..742d84a3f 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -29,6 +29,56 @@ def mdo(client): yield mdo +@pytest.fixture +def conversational_content(): + return { + 'row_data': { + "messages": [{ + "messageId": "message-0", + "timestampUsec": 1530718491, + "content": "I love iphone! i just bought new iphone! 🥰 📲", + "user": { + "userId": "Bot 002", + "name": "Bot" + }, + "align": "left", + "canLabel": False + }], + "version": 1, + "type": "application/vnd.labelbox.conversational" + } + } + + +@pytest.fixture +def tile_content(): + return { + "row_data": { + "tileLayerUrl": + "https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png", + "bounds": [[19.405662413477728, -99.21052827588443], + [19.400498983095076, -99.20534818927473]], + "minZoom": + 12, + "maxZoom": + 20, + "epsg": + "EPSG4326", + "alternativeLayers": [{ + "tileLayerUrl": + "https://api.mapbox.com/styles/v1/mapbox/satellite-streets-v11/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw", + "name": + "Satellite" + }, { + "tileLayerUrl": + "https://api.mapbox.com/styles/v1/mapbox/navigation-guidance-night-v4/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw", + "name": + "Guidance" + }] + } + } + + def make_metadata_fields(): embeddings = [0.0] * 128 msg = "A message" @@ -699,31 +749,14 @@ def test_data_row_rulk_creation_sync_with_same_global_keys( assert list(dataset.data_rows())[0].global_key == global_key_1 -def test_create_conversational_text(dataset): - content = { - 'row_data': { - "messages": [{ - "messageId": "message-0", - "timestampUsec": 1530718491, - "content": "I love iphone! i just bought new iphone! 🥰 📲", - "user": { - "userId": "Bot 002", - "name": "Bot" - }, - "align": "left", - "canLabel": False - }], - "version": 1, - "type": "application/vnd.labelbox.conversational" - } - } +def test_create_conversational_text(dataset, conversational_content): examples = [ { - **content, 'media_type': 'CONVERSATIONAL' + **conversational_content, 'media_type': 'CONVERSATIONAL' }, - content, + conversational_content, { - "conversationalData": content['row_data']['messages'] + "conversationalData": conversational_content['row_data']['messages'] } # Old way to check for backwards compatibility ] dataset.create_data_rows_sync(examples) @@ -733,25 +766,7 @@ def test_create_conversational_text(dataset): assert requests.get(data_row.row_data).json() == content['row_data'] -def test_invalid_media_type(dataset): - content = { - 'row_data': { - "messages": [{ - "messageId": "message-0", - "timestampUsec": 1530718491, - "content": "I love iphone! i just bought new iphone! 🥰 📲", - "user": { - "userId": "Bot 002", - "name": "Bot" - }, - "align": "left", - "canLabel": False - }], - "version": 1, - "type": "application/vnd.labelbox.conversational" - } - } - +def test_invalid_media_type(dataset, conversational_content): for error_message, invalid_media_type in [[ "Found invalid contents for media type: 'IMAGE'", 'IMAGE' ], ["Found invalid media type: 'totallyinvalid'", 'totallyinvalid']]: @@ -759,51 +774,47 @@ def test_invalid_media_type(dataset): # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError with pytest.raises(labelbox.exceptions.InvalidQueryError): dataset.create_data_rows_sync([{ - **content, 'media_type': invalid_media_type + **conversational_content, 'media_type': invalid_media_type }]) task = dataset.create_data_rows([{ - **content, 'media_type': invalid_media_type + **conversational_content, 'media_type': invalid_media_type }]) task.wait_till_done() assert task.errors == {'message': error_message} -def test_create_tiled_layer(dataset): - content = { - "row_data": { - "tileLayerUrl": - "https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png", - "bounds": [[19.405662413477728, -99.21052827588443], - [19.400498983095076, -99.20534818927473]], - "minZoom": - 12, - "maxZoom": - 20, - "epsg": - "EPSG4326", - "alternativeLayers": [{ - "tileLayerUrl": - "https://api.mapbox.com/styles/v1/mapbox/satellite-streets-v11/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw", - "name": - "Satellite" - }, { - "tileLayerUrl": - "https://api.mapbox.com/styles/v1/mapbox/navigation-guidance-night-v4/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw", - "name": - "Guidance" - }] - } - } +def test_create_tiled_layer(dataset, tile_content): examples = [ { - **content, 'media_type': 'TMS_SIMPLE' + **tile_content, 'media_type': 'TMS_SIMPLE' }, - content, - content['row_data'] # Old way to check for backwards compatibility + tile_content, + tile_content['row_data'] # Old way to check for backwards compatibility ] dataset.create_data_rows_sync(examples) data_rows = list(dataset.data_rows()) assert len(data_rows) == len(examples) for data_row in data_rows: assert json.loads(data_row.row_data) == content['row_data'] + + +def test_create_data_row_with_attachments(dataset): + attachment_value = 'attachment value' + dr = dataset.create_data_row(row_data="123", + attachments=[{ + 'type': 'TEXT', + 'value': attachment_value + }]) + attachments = list(dr.attachments()) + assert len(attachments) == 1 + + +def test_create_data_row_with_media_type(dataset, image_url): + with pytest.raises(labelbox.exceptions.InvalidQueryError) as exc: + dr = dataset.create_data_row( + row_data={'invalid_object': 'invalid_value'}, media_type="IMAGE") + assert "Found invalid contents for media type: \'IMAGE\'" in str(exc.value) + + dataset.create_data_row(row_data=image_url, media_type="IMAGE") + From d95fc2d60dd7e3c1fe87a466539f33e3ea7c3b68 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Tue, 1 Nov 2022 16:13:36 -0400 Subject: [PATCH 08/14] Delete test.py --- labelbox/test.py | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 labelbox/test.py diff --git a/labelbox/test.py b/labelbox/test.py deleted file mode 100644 index 8fd8365a6..000000000 --- a/labelbox/test.py +++ /dev/null @@ -1,18 +0,0 @@ -def run(fn, model_run): - try: - fn() - except Exception as e: - model_run.update_status(error_message=error) - pipelines[pipeline].update_status(PipelineState.FAILED, - json_data['model_run_id'], - error_message=str(e)) - else: - status - - -def model_run(payload): - - def etl(): - payload - - run(etl) From 760241c21dc1000da90e71c2b627440567055227 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Tue, 1 Nov 2022 16:13:57 -0400 Subject: [PATCH 09/14] Update data_row.py --- labelbox/schema/data_row.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py index 24fc25e9c..4c7bb8287 100644 --- a/labelbox/schema/data_row.py +++ b/labelbox/schema/data_row.py @@ -5,7 +5,6 @@ from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable from labelbox.orm.model import Entity, Field, Relationship from labelbox.schema.data_row_metadata import DataRowMetadataField # type: ignore -from labelbox.schema.media_type import MediaType if TYPE_CHECKING: from labelbox import AssetAttachment @@ -65,9 +64,6 @@ def __init__(self, *args, **kwargs): self.attachments.supports_filtering = False self.attachments.supports_sorting = False - def _set_field_values(self, field_values): - super()._set_field_values(field_values) - @staticmethod def bulk_delete(data_rows) -> None: """ Deletes all the given DataRows. From c8b81e1037b52ef764e4c355c244de9d3d4ea2f2 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Thu, 3 Nov 2022 06:54:36 -0400 Subject: [PATCH 10/14] format --- tests/integration/test_data_rows.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index 742d84a3f..7e744af21 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -817,4 +817,3 @@ def test_create_data_row_with_media_type(dataset, image_url): assert "Found invalid contents for media type: \'IMAGE\'" in str(exc.value) dataset.create_data_row(row_data=image_url, media_type="IMAGE") - From 18c31316b93d1d03e9fde3486aba516b5f3eaf6d Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Thu, 3 Nov 2022 10:33:34 -0400 Subject: [PATCH 11/14] add global key and external id to create data row --- labelbox/schema/dataset.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 75641e3c5..bdfe02e35 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -115,6 +115,8 @@ def convert_field_keys(items): $metadata_fields: [DataRowCustomMetadataUpsertInput!], $attachments: [DataRowAttachmentInput!], $media_type : MediaType, + $external_id : String, + $global_key : String, $dataset: ID! ){ createDataRow( @@ -123,6 +125,8 @@ def convert_field_keys(items): rowData: $row_data mediaType: $media_type metadataFields: $metadata_fields + externalId: $external_id + globalKey: $global_key attachments: $attachments dataset: {connect: {id: $dataset}} } From 4d1062b97e3350578d355dcd71a5988d1640d7d2 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Thu, 3 Nov 2022 12:22:24 -0400 Subject: [PATCH 12/14] fix var name in test --- tests/integration/test_data_rows.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index 7e744af21..d256caf60 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -763,7 +763,8 @@ def test_create_conversational_text(dataset, conversational_content): data_rows = list(dataset.data_rows()) assert len(data_rows) == len(examples) for data_row in data_rows: - assert requests.get(data_row.row_data).json() == content['row_data'] + assert requests.get( + data_row.row_data).json() == conversational_content['row_data'] def test_invalid_media_type(dataset, conversational_content): @@ -796,7 +797,7 @@ def test_create_tiled_layer(dataset, tile_content): data_rows = list(dataset.data_rows()) assert len(data_rows) == len(examples) for data_row in data_rows: - assert json.loads(data_row.row_data) == content['row_data'] + assert json.loads(data_row.row_data) == tile_content['row_data'] def test_create_data_row_with_attachments(dataset): From 8c558602ef2996395de1979c372aa972b5be3475 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Mon, 7 Nov 2022 09:21:43 -0500 Subject: [PATCH 13/14] create data row with objects --- labelbox/schema/data_row.py | 9 +++++++++ tests/integration/test_data_rows.py | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py index 4c7bb8287..0ec7a4e6e 100644 --- a/labelbox/schema/data_row.py +++ b/labelbox/schema/data_row.py @@ -1,5 +1,6 @@ import logging from typing import TYPE_CHECKING +import json from labelbox.orm import query from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable @@ -64,6 +65,14 @@ def __init__(self, *args, **kwargs): self.attachments.supports_filtering = False self.attachments.supports_sorting = False + def update(self, **kwargs): + # Convert row data to string if it is an object + # All other updates pass through + row_data = kwargs.get("row_data") + if isinstance(row_data, dict): + kwargs['row_data'] = json.dumps(kwargs['row_data']) + super().update(**kwargs) + @staticmethod def bulk_delete(data_rows) -> None: """ Deletes all the given DataRows. diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index d256caf60..bcbd2c315 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -459,6 +459,13 @@ def test_data_row_update(dataset, rand_gen, image_url): data_row.update(external_id=external_id_2) assert data_row.external_id == external_id_2 + data_row.update(row_data="123") + assert data_row.row_data == "123" + + # tileLayer becomes a media attribute + data_row.update(row_data={'pdfUrl': "123", "tileLayerUrl": "123"}) + assert data_row.row_data == "123" + def test_data_row_filtering_sorting(dataset, image_url): task = dataset.create_data_rows([ From c1812cd22b4e75b80734245529dde721d2b1eb54 Mon Sep 17 00:00:00 2001 From: Matt Sokoloff Date: Thu, 10 Nov 2022 13:18:57 -0500 Subject: [PATCH 14/14] fix data row update test --- tests/integration/test_data_rows.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index bcbd2c315..d885c4391 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -459,12 +459,17 @@ def test_data_row_update(dataset, rand_gen, image_url): data_row.update(external_id=external_id_2) assert data_row.external_id == external_id_2 - data_row.update(row_data="123") - assert data_row.row_data == "123" + in_line_content = "123" + data_row.update(row_data=in_line_content) + assert requests.get(data_row.row_data).text == in_line_content + + data_row.update(row_data=image_url) + assert data_row.row_data == image_url # tileLayer becomes a media attribute - data_row.update(row_data={'pdfUrl': "123", "tileLayerUrl": "123"}) - assert data_row.row_data == "123" + pdf_url = "http://somepdfurl" + data_row.update(row_data={'pdfUrl': pdf_url, "tileLayerUrl": "123"}) + assert data_row.row_data == pdf_url def test_data_row_filtering_sorting(dataset, image_url):