From 46093443a5ee55cfce7e6f620c97c19d68bc9cbb Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Fri, 27 Jan 2023 09:49:51 -0800 Subject: [PATCH 1/2] Fix test_data_rows.py that expects an error message from backend --- tests/integration/test_data_rows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index ade50fa0a..74c520396 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -782,7 +782,7 @@ def test_data_row_bulk_creation_with_same_global_keys(dataset, sample_image): assert task.status == "FAILED" assert len(task.failed_data_rows) > 0 assert len(list(dataset.data_rows())) == 0 - assert task.errors == "Data rows contain empty string or duplicate global keys, which are not allowed" + assert task.errors == "Data rows contain duplicate global keys" task = dataset.create_data_rows([{ DataRow.row_data: sample_image, From a939c4d2885537f18037255d95547cb915284526 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Fri, 27 Jan 2023 10:36:09 -0800 Subject: [PATCH 2/2] Fix failing metadata related tests due to embeddings sunset --- tests/integration/test_data_row_metadata.py | 57 ++++++--------------- tests/integration/test_data_rows.py | 28 ++++------ tests/integration/test_task.py | 11 ++-- 3 files changed, 30 insertions(+), 66 deletions(-) diff --git a/tests/integration/test_data_row_metadata.py b/tests/integration/test_data_row_metadata.py index f1c27d86b..b6082eb1d 100644 --- a/tests/integration/test_data_row_metadata.py +++ b/tests/integration/test_data_row_metadata.py @@ -14,10 +14,8 @@ SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal" TRAIN_SPLIT_ID = "cko8sbscr0003h2dk04w86hof" TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt" -EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc" TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb" -PRE_COMPUTED_EMBEDDINGS_ID = 'ckrzang79000008l6hb5s6za1' CUSTOM_TEXT_SCHEMA_NAME = 'custom_text' FAKE_NUMBER_FIELD = { @@ -56,7 +54,6 @@ def big_dataset(dataset: Dataset, image_url): def make_metadata(dr_id) -> DataRowMetadata: - embeddings = [0.0] * 128 msg = "A message" time = datetime.utcnow() @@ -67,14 +64,11 @@ def make_metadata(dr_id) -> DataRowMetadata: value=TEST_SPLIT_ID), DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time), DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg), - DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, - value=embeddings), ]) return metadata def make_named_metadata(dr_id) -> DataRowMetadata: - embeddings = [0.0] * 128 msg = "A message" time = datetime.utcnow() @@ -86,8 +80,6 @@ def make_named_metadata(dr_id) -> DataRowMetadata: value=time), DataRowMetadataField( name=CUSTOM_TEXT_SCHEMA_NAME, value=msg), - DataRowMetadataField(name='embedding', - value=embeddings), ]) return metadata @@ -127,10 +119,7 @@ def test_bulk_upsert_datarow_metadata(datarow, mdo: DataRowMetadataOntology): mdo.bulk_upsert([metadata]) exported = mdo.bulk_export([datarow.uid]) assert len(exported) - assert len([ - field for field in exported[0].fields - if field.schema_id != PRE_COMPUTED_EMBEDDINGS_ID - ]) == 4 + assert len([field for field in exported[0].fields]) == 3 @pytest.mark.slow @@ -147,10 +136,8 @@ def test_large_bulk_upsert_datarow_metadata(big_dataset, mdo): for metadata in mdo.bulk_export(data_row_ids) } for data_row_id in data_row_ids: - assert len([ - f for f in metadata_lookup.get(data_row_id).fields - if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID - ]), metadata_lookup.get(data_row_id).fields + assert len([f for f in metadata_lookup.get(data_row_id).fields + ]), metadata_lookup.get(data_row_id).fields def test_upsert_datarow_metadata_by_name(datarow, mdo): @@ -162,10 +149,8 @@ def test_upsert_datarow_metadata_by_name(datarow, mdo): metadata.data_row_id: metadata for metadata in mdo.bulk_export([datarow.uid]) } - assert len([ - f for f in metadata_lookup.get(datarow.uid).fields - if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID - ]), metadata_lookup.get(datarow.uid).fields + assert len([f for f in metadata_lookup.get(datarow.uid).fields + ]), metadata_lookup.get(datarow.uid).fields def test_upsert_datarow_metadata_option_by_name(datarow, mdo): @@ -220,10 +205,7 @@ def test_bulk_partial_delete_datarow_metadata(datarow, mdo): mdo.bulk_delete([ DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[TEXT_SCHEMA_ID]) ]) - fields = [ - f for f in mdo.bulk_export([datarow.uid])[0].fields - if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID - ] + fields = [f for f in mdo.bulk_export([datarow.uid])[0].fields] assert len(fields) == (len(metadata.fields) - 1) @@ -234,9 +216,8 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo): metadata.append( DataRowMetadata(data_row_id=data_row_id, fields=[ - DataRowMetadataField( - schema_id=EMBEDDING_SCHEMA_ID, - value=[0.1] * 128), + DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, + value=TEST_SPLIT_ID), DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value="test-message") ])) @@ -248,19 +229,13 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo): deletes.append( DeleteDataRowMetadata( data_row_id=data_row_id, - fields=[ - EMBEDDING_SCHEMA_ID, # - CAPTURE_DT_SCHEMA_ID - ])) + fields=[SPLIT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID])) errors = mdo.bulk_delete(deletes) assert len(errors) == 0 for data_row_id in data_row_ids: - fields = [ - f for f in mdo.bulk_export([data_row_id])[0].fields - if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID - ] + fields = [f for f in mdo.bulk_export([data_row_id])[0].fields] assert len(fields) == 1, fields - assert EMBEDDING_SCHEMA_ID not in [field.schema_id for field in fields] + assert SPLIT_SCHEMA_ID not in [field.schema_id for field in fields] def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo): @@ -280,8 +255,7 @@ def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo): DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID]) ]) exported = mdo.bulk_export([datarow.uid])[0].fields - assert len( - [f for f in exported if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID]) == 0 + assert len(exported) == 0 def test_raise_enum_upsert_schema_error(datarow, mdo): @@ -309,11 +283,10 @@ def test_upsert_non_existent_schema_id(datarow, mdo): def test_delete_non_existent_schema_id(datarow, mdo): - mdo.bulk_delete([ - DeleteDataRowMetadata(data_row_id=datarow.uid, - fields=[EMBEDDING_SCHEMA_ID]) + res = mdo.bulk_delete([ + DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID]) ]) - # No message is returned + assert len(res) == 0 def test_parse_raw_metadata(mdo): diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py index 74c520396..0f68ef1e2 100644 --- a/tests/integration/test_data_rows.py +++ b/tests/integration/test_data_rows.py @@ -14,12 +14,10 @@ SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal" TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt" -EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc" TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb" EXPECTED_METADATA_SCHEMA_IDS = [ - SPLIT_SCHEMA_ID, TEST_SPLIT_ID, EMBEDDING_SCHEMA_ID, TEXT_SCHEMA_ID, - CAPTURE_DT_SCHEMA_ID + SPLIT_SCHEMA_ID, TEST_SPLIT_ID, TEXT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID ].sort() CUSTOM_TEXT_SCHEMA_NAME = "custom_text" @@ -88,7 +86,6 @@ def tile_content(): def make_metadata_fields(): - embeddings = [0.0] * 128 msg = "A message" time = datetime.utcnow() @@ -96,13 +93,11 @@ def make_metadata_fields(): DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, value=TEST_SPLIT_ID), DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time), DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg), - DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=embeddings), ] return fields def make_metadata_fields_dict(): - embeddings = [0.0] * 128 msg = "A message" time = datetime.utcnow() @@ -115,9 +110,6 @@ def make_metadata_fields_dict(): }, { "schema_id": TEXT_SCHEMA_ID, "value": msg - }, { - "schema_id": EMBEDDING_SCHEMA_ID, - "value": embeddings }] return fields @@ -312,8 +304,8 @@ def test_create_data_row_with_metadata(mdo, dataset, image_url): assert data_row.media_attributes is not None metadata_fields = data_row.metadata_fields metadata = data_row.metadata - assert len(metadata_fields) == 4 - assert len(metadata) == 4 + assert len(metadata_fields) == 3 + assert len(metadata) == 3 assert [m["schemaId"] for m in metadata_fields ].sort() == EXPECTED_METADATA_SCHEMA_IDS for m in metadata: @@ -336,8 +328,8 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): assert data_row.media_attributes is not None metadata_fields = data_row.metadata_fields metadata = data_row.metadata - assert len(metadata_fields) == 4 - assert len(metadata) == 4 + assert len(metadata_fields) == 3 + assert len(metadata) == 3 assert [m["schemaId"] for m in metadata_fields ].sort() == EXPECTED_METADATA_SCHEMA_IDS for m in metadata: @@ -347,7 +339,7 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url): def test_create_data_row_with_invalid_metadata(dataset, image_url): fields = make_metadata_fields() fields.append( - DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=[0.0] * 128)) + DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value='some msg')) with pytest.raises(labelbox.exceptions.MalformedQueryException): dataset.create_data_row(row_data=image_url, metadata_fields=fields) @@ -393,8 +385,8 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url): metadata_fields = row.metadata_fields metadata = row.metadata - assert len(metadata_fields) == 4 - assert len(metadata) == 4 + assert len(metadata_fields) == 3 + assert len(metadata) == 3 assert [m["schemaId"] for m in metadata_fields ].sort() == EXPECTED_METADATA_SCHEMA_IDS for m in metadata: @@ -479,7 +471,7 @@ def create_data_row(data_rows): def test_create_data_rows_with_invalid_metadata(dataset, image_url): fields = make_metadata_fields() fields.append( - DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=[0.0] * 128)) + DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value='some msg')) task = dataset.create_data_rows([{ DataRow.row_data: image_url, @@ -730,7 +722,7 @@ def test_create_data_rows_local_file(dataset, sample_image): assert task.status == "COMPLETE" data_row = list(dataset.data_rows())[0] assert data_row.external_id == "tests/integration/media/sample_image.jpg" - assert len(data_row.metadata_fields) == 4 + assert len(data_row.metadata_fields) == 3 def test_data_row_with_global_key(dataset, sample_image): diff --git a/tests/integration/test_task.py b/tests/integration/test_task.py index e9a9118c3..9b57c54af 100644 --- a/tests/integration/test_task.py +++ b/tests/integration/test_task.py @@ -3,21 +3,20 @@ from labelbox import DataRow from labelbox.schema.data_row_metadata import DataRowMetadataField -EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc" +TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh" def test_task_errors(dataset, image_url): client = dataset.client - embeddings = [0.0] * 128 task = dataset.create_data_rows([ { DataRow.row_data: image_url, DataRow.metadata_fields: [ - DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, - value=embeddings), - DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, - value=embeddings) + DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, + value='some msg'), + DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, + value='some msg 2') ] }, ])