Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 15 additions & 42 deletions tests/integration/test_data_row_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@
SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
TRAIN_SPLIT_ID = "cko8sbscr0003h2dk04w86hof"
TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt"
EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc"
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb"
PRE_COMPUTED_EMBEDDINGS_ID = 'ckrzang79000008l6hb5s6za1'
CUSTOM_TEXT_SCHEMA_NAME = 'custom_text'

FAKE_NUMBER_FIELD = {
Expand Down Expand Up @@ -56,7 +54,6 @@ def big_dataset(dataset: Dataset, image_url):


def make_metadata(dr_id) -> DataRowMetadata:
embeddings = [0.0] * 128
msg = "A message"
time = datetime.utcnow()

Expand All @@ -67,14 +64,11 @@ def make_metadata(dr_id) -> DataRowMetadata:
value=TEST_SPLIT_ID),
DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time),
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg),
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID,
value=embeddings),
])
return metadata


def make_named_metadata(dr_id) -> DataRowMetadata:
embeddings = [0.0] * 128
msg = "A message"
time = datetime.utcnow()

Expand All @@ -86,8 +80,6 @@ def make_named_metadata(dr_id) -> DataRowMetadata:
value=time),
DataRowMetadataField(
name=CUSTOM_TEXT_SCHEMA_NAME, value=msg),
DataRowMetadataField(name='embedding',
value=embeddings),
])
return metadata

Expand Down Expand Up @@ -127,10 +119,7 @@ def test_bulk_upsert_datarow_metadata(datarow, mdo: DataRowMetadataOntology):
mdo.bulk_upsert([metadata])
exported = mdo.bulk_export([datarow.uid])
assert len(exported)
assert len([
field for field in exported[0].fields
if field.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
]) == 4
assert len([field for field in exported[0].fields]) == 3


@pytest.mark.slow
Expand All @@ -147,10 +136,8 @@ def test_large_bulk_upsert_datarow_metadata(big_dataset, mdo):
for metadata in mdo.bulk_export(data_row_ids)
}
for data_row_id in data_row_ids:
assert len([
f for f in metadata_lookup.get(data_row_id).fields
if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
]), metadata_lookup.get(data_row_id).fields
assert len([f for f in metadata_lookup.get(data_row_id).fields
]), metadata_lookup.get(data_row_id).fields


def test_upsert_datarow_metadata_by_name(datarow, mdo):
Expand All @@ -162,10 +149,8 @@ def test_upsert_datarow_metadata_by_name(datarow, mdo):
metadata.data_row_id: metadata
for metadata in mdo.bulk_export([datarow.uid])
}
assert len([
f for f in metadata_lookup.get(datarow.uid).fields
if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
]), metadata_lookup.get(datarow.uid).fields
assert len([f for f in metadata_lookup.get(datarow.uid).fields
]), metadata_lookup.get(datarow.uid).fields


def test_upsert_datarow_metadata_option_by_name(datarow, mdo):
Expand Down Expand Up @@ -220,10 +205,7 @@ def test_bulk_partial_delete_datarow_metadata(datarow, mdo):
mdo.bulk_delete([
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[TEXT_SCHEMA_ID])
])
fields = [
f for f in mdo.bulk_export([datarow.uid])[0].fields
if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
]
fields = [f for f in mdo.bulk_export([datarow.uid])[0].fields]
assert len(fields) == (len(metadata.fields) - 1)


Expand All @@ -234,9 +216,8 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
metadata.append(
DataRowMetadata(data_row_id=data_row_id,
fields=[
DataRowMetadataField(
schema_id=EMBEDDING_SCHEMA_ID,
value=[0.1] * 128),
DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID,
value=TEST_SPLIT_ID),
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID,
value="test-message")
]))
Expand All @@ -248,19 +229,13 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
deletes.append(
DeleteDataRowMetadata(
data_row_id=data_row_id,
fields=[
EMBEDDING_SCHEMA_ID, #
CAPTURE_DT_SCHEMA_ID
]))
fields=[SPLIT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID]))
errors = mdo.bulk_delete(deletes)
assert len(errors) == 0
for data_row_id in data_row_ids:
fields = [
f for f in mdo.bulk_export([data_row_id])[0].fields
if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID
]
fields = [f for f in mdo.bulk_export([data_row_id])[0].fields]
assert len(fields) == 1, fields
assert EMBEDDING_SCHEMA_ID not in [field.schema_id for field in fields]
assert SPLIT_SCHEMA_ID not in [field.schema_id for field in fields]


def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
Expand All @@ -280,8 +255,7 @@ def test_bulk_delete_datarow_enum_metadata(datarow: DataRow, mdo):
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID])
])
exported = mdo.bulk_export([datarow.uid])[0].fields
assert len(
[f for f in exported if f.schema_id != PRE_COMPUTED_EMBEDDINGS_ID]) == 0
assert len(exported) == 0


def test_raise_enum_upsert_schema_error(datarow, mdo):
Expand Down Expand Up @@ -309,11 +283,10 @@ def test_upsert_non_existent_schema_id(datarow, mdo):


def test_delete_non_existent_schema_id(datarow, mdo):
mdo.bulk_delete([
DeleteDataRowMetadata(data_row_id=datarow.uid,
fields=[EMBEDDING_SCHEMA_ID])
res = mdo.bulk_delete([
DeleteDataRowMetadata(data_row_id=datarow.uid, fields=[SPLIT_SCHEMA_ID])
])
# No message is returned
assert len(res) == 0


def test_parse_raw_metadata(mdo):
Expand Down
30 changes: 11 additions & 19 deletions tests/integration/test_data_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@

SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt"
EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc"
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
CAPTURE_DT_SCHEMA_ID = "cko8sdzv70006h2dk8jg64zvb"
EXPECTED_METADATA_SCHEMA_IDS = [
SPLIT_SCHEMA_ID, TEST_SPLIT_ID, EMBEDDING_SCHEMA_ID, TEXT_SCHEMA_ID,
CAPTURE_DT_SCHEMA_ID
SPLIT_SCHEMA_ID, TEST_SPLIT_ID, TEXT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID
].sort()
CUSTOM_TEXT_SCHEMA_NAME = "custom_text"

Expand Down Expand Up @@ -88,21 +86,18 @@ def tile_content():


def make_metadata_fields():
embeddings = [0.0] * 128
msg = "A message"
time = datetime.utcnow()

fields = [
DataRowMetadataField(schema_id=SPLIT_SCHEMA_ID, value=TEST_SPLIT_ID),
DataRowMetadataField(schema_id=CAPTURE_DT_SCHEMA_ID, value=time),
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value=msg),
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=embeddings),
]
return fields


def make_metadata_fields_dict():
embeddings = [0.0] * 128
msg = "A message"
time = datetime.utcnow()

Expand All @@ -115,9 +110,6 @@ def make_metadata_fields_dict():
}, {
"schema_id": TEXT_SCHEMA_ID,
"value": msg
}, {
"schema_id": EMBEDDING_SCHEMA_ID,
"value": embeddings
}]
return fields

Expand Down Expand Up @@ -312,8 +304,8 @@ def test_create_data_row_with_metadata(mdo, dataset, image_url):
assert data_row.media_attributes is not None
metadata_fields = data_row.metadata_fields
metadata = data_row.metadata
assert len(metadata_fields) == 4
assert len(metadata) == 4
assert len(metadata_fields) == 3
assert len(metadata) == 3
assert [m["schemaId"] for m in metadata_fields
].sort() == EXPECTED_METADATA_SCHEMA_IDS
for m in metadata:
Expand All @@ -336,8 +328,8 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url):
assert data_row.media_attributes is not None
metadata_fields = data_row.metadata_fields
metadata = data_row.metadata
assert len(metadata_fields) == 4
assert len(metadata) == 4
assert len(metadata_fields) == 3
assert len(metadata) == 3
assert [m["schemaId"] for m in metadata_fields
].sort() == EXPECTED_METADATA_SCHEMA_IDS
for m in metadata:
Expand All @@ -347,7 +339,7 @@ def test_create_data_row_with_metadata_dict(mdo, dataset, image_url):
def test_create_data_row_with_invalid_metadata(dataset, image_url):
fields = make_metadata_fields()
fields.append(
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=[0.0] * 128))
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value='some msg'))

with pytest.raises(labelbox.exceptions.MalformedQueryException):
dataset.create_data_row(row_data=image_url, metadata_fields=fields)
Expand Down Expand Up @@ -393,8 +385,8 @@ def test_create_data_rows_with_metadata(mdo, dataset, image_url):

metadata_fields = row.metadata_fields
metadata = row.metadata
assert len(metadata_fields) == 4
assert len(metadata) == 4
assert len(metadata_fields) == 3
assert len(metadata) == 3
assert [m["schemaId"] for m in metadata_fields
].sort() == EXPECTED_METADATA_SCHEMA_IDS
for m in metadata:
Expand Down Expand Up @@ -479,7 +471,7 @@ def create_data_row(data_rows):
def test_create_data_rows_with_invalid_metadata(dataset, image_url):
fields = make_metadata_fields()
fields.append(
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID, value=[0.0] * 128))
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID, value='some msg'))

task = dataset.create_data_rows([{
DataRow.row_data: image_url,
Expand Down Expand Up @@ -730,7 +722,7 @@ def test_create_data_rows_local_file(dataset, sample_image):
assert task.status == "COMPLETE"
data_row = list(dataset.data_rows())[0]
assert data_row.external_id == "tests/integration/media/sample_image.jpg"
assert len(data_row.metadata_fields) == 4
assert len(data_row.metadata_fields) == 3


def test_data_row_with_global_key(dataset, sample_image):
Expand Down Expand Up @@ -782,7 +774,7 @@ def test_data_row_bulk_creation_with_same_global_keys(dataset, sample_image):
assert task.status == "FAILED"
assert len(task.failed_data_rows) > 0
assert len(list(dataset.data_rows())) == 0
assert task.errors == "Data rows contain empty string or duplicate global keys, which are not allowed"
assert task.errors == "Data rows contain duplicate global keys"

task = dataset.create_data_rows([{
DataRow.row_data: sample_image,
Expand Down
11 changes: 5 additions & 6 deletions tests/integration/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,20 @@
from labelbox import DataRow
from labelbox.schema.data_row_metadata import DataRowMetadataField

EMBEDDING_SCHEMA_ID = "ckpyije740000yxdk81pbgjdc"
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"


def test_task_errors(dataset, image_url):
client = dataset.client
embeddings = [0.0] * 128
task = dataset.create_data_rows([
{
DataRow.row_data:
image_url,
DataRow.metadata_fields: [
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID,
value=embeddings),
DataRowMetadataField(schema_id=EMBEDDING_SCHEMA_ID,
value=embeddings)
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID,
value='some msg'),
DataRowMetadataField(schema_id=TEXT_SCHEMA_ID,
value='some msg 2')
]
},
])
Expand Down