Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,13 +750,6 @@ def export_v2_test_helpers() -> Type[ExportV2Helpers]:
return ExportV2Helpers()


@pytest.fixture(scope="session")
def is_adv_enabled(client) -> bool:
query_str = "query IsAdvEnabledPyApi { user { isAdvEnabled } }"
response = client.execute(query_str)
return bool(response['user']['isAdvEnabled'])


IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg"
EXTERNAL_ID = "my-image"

Expand Down
25 changes: 10 additions & 15 deletions tests/integration/test_data_row_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def test_bulk_partial_delete_datarow_metadata(data_row, mdo):
assert len(fields) == (len(metadata.fields) - 1)


def test_large_bulk_delete_datarow_metadata(big_dataset, mdo, is_adv_enabled):
def test_large_bulk_delete_datarow_metadata(big_dataset, mdo):
metadata = []
data_row_ids = [dr.uid for dr in big_dataset.data_rows()]
for data_row_id in data_row_ids:
Expand All @@ -249,13 +249,11 @@ def test_large_bulk_delete_datarow_metadata(big_dataset, mdo, is_adv_enabled):
data_row_id=data_row_id,
fields=[SPLIT_SCHEMA_ID, CAPTURE_DT_SCHEMA_ID]))
errors = mdo.bulk_delete(deletes)
if is_adv_enabled:
assert len(errors) == len(data_row_ids)
for error in errors:
assert error.fields == [CAPTURE_DT_SCHEMA_ID]
assert error.error == 'Schema did not exist'
else:
assert len(errors) == 0

assert len(errors) == len(data_row_ids)
for error in errors:
assert error.fields == [CAPTURE_DT_SCHEMA_ID]
assert error.error == 'Schema did not exist'

for data_row_id in data_row_ids:
fields = [f for f in mdo.bulk_export([data_row_id])[0].fields]
Expand Down Expand Up @@ -308,17 +306,14 @@ def test_upsert_non_existent_schema_id(data_row, mdo):
mdo.bulk_upsert([metadata])


def test_delete_non_existent_schema_id(data_row, mdo, is_adv_enabled):
def test_delete_non_existent_schema_id(data_row, mdo):
res = mdo.bulk_delete([
DeleteDataRowMetadata(data_row_id=data_row.uid,
fields=[SPLIT_SCHEMA_ID])
])
if is_adv_enabled:
assert len(res) == 1
assert res[0].fields == [SPLIT_SCHEMA_ID]
assert res[0].error == 'Schema did not exist'
else:
assert len(res) == 0
assert len(res) == 1
assert res[0].fields == [SPLIT_SCHEMA_ID]
assert res[0].error == 'Schema did not exist'


def test_parse_raw_metadata(mdo):
Expand Down
136 changes: 38 additions & 98 deletions tests/integration/test_data_rows.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@

from labelbox import DataRow
from labelbox.exceptions import MalformedQueryException
from labelbox.schema.export_filters import DatarowExportFilters
from labelbox.schema.task import Task
from labelbox.schema.data_row_metadata import DataRowMetadataField, DataRowMetadataKind
import labelbox.exceptions
from utils import INTEGRATION_SNAPSHOT_DIRECTORY

SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt"
Expand Down Expand Up @@ -484,8 +482,7 @@ def create_data_row(data_rows):
CUSTOM_TEXT_SCHEMA_NAME].uid


def test_create_data_rows_with_invalid_metadata(dataset, image_url,
is_adv_enabled):
def test_create_data_rows_with_invalid_metadata(dataset, image_url):
fields = make_metadata_fields()
# make the payload invalid by providing the same schema id more than once
fields.append(
Expand All @@ -496,14 +493,11 @@ def test_create_data_rows_with_invalid_metadata(dataset, image_url,
DataRow.metadata_fields: fields
}])
task.wait_till_done(timeout_seconds=60)
if is_adv_enabled:
assert task.status == "COMPLETE"
assert len(task.failed_data_rows) == 1
assert f"A schemaId can only be specified once per DataRow : [{TEXT_SCHEMA_ID}]" in task.failed_data_rows[
0]["message"]
else:
assert task.status == "FAILED"
assert len(task.failed_data_rows) > 0

assert task.status == "COMPLETE"
assert len(task.failed_data_rows) == 1
assert f"A schemaId can only be specified once per DataRow : [{TEXT_SCHEMA_ID}]" in task.failed_data_rows[
0]["message"]


def test_create_data_rows_with_metadata_missing_value(dataset, image_url):
Expand Down Expand Up @@ -815,7 +809,7 @@ def test_data_row_bulk_creation_with_unique_global_keys(dataset, sample_image):


def test_data_row_bulk_creation_with_same_global_keys(dataset, sample_image,
snapshot, is_adv_enabled):
snapshot):
global_key_1 = str(uuid.uuid4())
task = dataset.create_data_rows([{
DataRow.row_data: sample_image,
Expand All @@ -826,48 +820,22 @@ def test_data_row_bulk_creation_with_same_global_keys(dataset, sample_image,
}])

task.wait_till_done()
if is_adv_enabled:
assert task.status == "COMPLETE"
assert type(task.failed_data_rows) is list
assert len(task.failed_data_rows) == 1
assert type(task.created_data_rows) is list
assert len(task.created_data_rows) == 1
assert task.failed_data_rows[0][
'message'] == f"Duplicate global key: '{global_key_1}'"
assert task.failed_data_rows[0]['failedDataRows'][0][
'externalId'] == sample_image
assert task.created_data_rows[0]['externalId'] == sample_image
assert task.created_data_rows[0]['globalKey'] == global_key_1
else:
assert task.status == "FAILED"
assert len(task.failed_data_rows) > 0
assert len(list(dataset.data_rows())) == 0
assert task.errors == "Data rows contain duplicate global keys"

# Dynamic values, resetting to make snapshot
task.failed_data_rows[0]['failedDataRows'][0]['rowData'] = ''
task.failed_data_rows[0]['failedDataRows'][1]['rowData'] = ''
task.failed_data_rows[0]['failedDataRows'][0]['globalKey'] = ''
task.failed_data_rows[0]['failedDataRows'][1]['globalKey'] = ''
snapshot.snapshot_dir = INTEGRATION_SNAPSHOT_DIRECTORY
snapshot.assert_match(
json.dumps(task.failed_data_rows),
'test_data_rows.test_data_row_bulk_creation_with_same_global_keys.failed_data_rows.json'
)

task = dataset.create_data_rows([{
DataRow.row_data: sample_image,
DataRow.global_key: global_key_1
}])

task.wait_till_done()
assert task.status == "COMPLETE"
assert len(list(dataset.data_rows())) == 1
assert list(dataset.data_rows())[0].global_key == global_key_1
assert task.status == "COMPLETE"
assert type(task.failed_data_rows) is list
assert len(task.failed_data_rows) == 1
assert type(task.created_data_rows) is list
assert len(task.created_data_rows) == 1
assert task.failed_data_rows[0][
'message'] == f"Duplicate global key: '{global_key_1}'"
assert task.failed_data_rows[0]['failedDataRows'][0][
'externalId'] == sample_image
assert task.created_data_rows[0]['externalId'] == sample_image
assert task.created_data_rows[0]['globalKey'] == global_key_1


def test_data_row_delete_and_create_with_same_global_key(
client, dataset, sample_image, is_adv_enabled):
client, dataset, sample_image):
global_key_1 = str(uuid.uuid4())
data_row_payload = {
DataRow.row_data: sample_image,
Expand All @@ -887,15 +855,10 @@ def test_data_row_delete_and_create_with_same_global_key(
task = dataset.create_data_rows([data_row_payload])
task.wait_till_done()

if is_adv_enabled:
assert task.status == "COMPLETE"
assert len(task.failed_data_rows) == 1
assert task.failed_data_rows[0][
'message'] == f"Duplicate global key: '{global_key_1}'"
else:
assert task.status == "FAILED"
assert len(task.failed_data_rows) > 0
assert task.errors.startswith("Duplicate global keys found")
assert task.status == "COMPLETE"
assert len(task.failed_data_rows) == 1
assert task.failed_data_rows[0][
'message'] == f"Duplicate global key: '{global_key_1}'"

# delete datarow
client.get_data_row(new_data_row_id).delete()
Expand Down Expand Up @@ -934,7 +897,7 @@ def test_data_row_bulk_creation_sync_with_unique_global_keys(


def test_data_row_bulk_creation_sync_with_same_global_keys(
dataset, sample_image, is_adv_enabled):
dataset, sample_image):
global_key_1 = str(uuid.uuid4())

with pytest.raises(labelbox.exceptions.MalformedQueryException) as exc_info:
Expand All @@ -946,22 +909,10 @@ def test_data_row_bulk_creation_sync_with_same_global_keys(
DataRow.global_key: global_key_1
}])

if is_adv_enabled:
# ADV will import the first data row but not the second (duplicate global key)
assert len(list(dataset.data_rows())) == 1
assert list(dataset.data_rows())[0].global_key == global_key_1
assert "Some data rows were not imported. Check error output here" in str(
exc_info.value)
else:
assert len(list(dataset.data_rows())) == 0

dataset.create_data_rows_sync([{
DataRow.row_data: sample_image,
DataRow.global_key: global_key_1
}])

assert len(list(dataset.data_rows())) == 1
assert list(dataset.data_rows())[0].global_key == global_key_1
assert len(list(dataset.data_rows())) == 1
assert list(dataset.data_rows())[0].global_key == global_key_1
assert "Some data rows were not imported. Check error output here" in str(
exc_info.value)


@pytest.fixture
Expand Down Expand Up @@ -995,27 +946,19 @@ def test_create_conversational_text(converstational_data_rows,
data_row.row_data).json() == conversational_content['row_data']


def test_invalid_media_type(dataset, conversational_content, is_adv_enabled):
for error_message, invalid_media_type in [[
"Found invalid contents for media type: 'IMAGE'", 'IMAGE'
], ["Found invalid media type: 'totallyinvalid'", 'totallyinvalid']]:
def test_invalid_media_type(dataset, conversational_content):
for _, __ in [["Found invalid contents for media type: 'IMAGE'", 'IMAGE'],
[
"Found invalid media type: 'totallyinvalid'",
'totallyinvalid'
]]:
# TODO: What error kind should this be? It looks like for global key we are
# using malformed query. But for invalid contents in FileUploads we use InvalidQueryError
with pytest.raises(labelbox.exceptions.InvalidQueryError):
dataset.create_data_rows_sync([{
**conversational_content, 'media_type': 'IMAGE'
}])

if is_adv_enabled:
# ADV does not take media type hint into account for async import requests
continue

task = dataset.create_data_rows([{
**conversational_content, 'media_type': invalid_media_type
}])
task.wait_till_done()
assert task.errors == {'message': error_message}


def test_create_tiled_layer(dataset, tile_content):
examples = [
Expand Down Expand Up @@ -1044,15 +987,12 @@ def test_create_data_row_with_attachments(dataset):
assert len(attachments) == 1


def test_create_data_row_with_media_type(dataset, image_url, is_adv_enabled):
def test_create_data_row_with_media_type(dataset, image_url):
with pytest.raises(labelbox.exceptions.InvalidQueryError) as exc:
dr = dataset.create_data_row(
row_data={'invalid_object': 'invalid_value'}, media_type="IMAGE")
if is_adv_enabled:
assert "Media type validation failed, expected: 'image/*', was: application/json" in str(
exc.value)
else:
assert "Found invalid contents for media type: \'IMAGE\'" in str(
exc.value)

assert "Media type validation failed, expected: 'image/*', was: application/json" in str(
exc.value)

dataset.create_data_row(row_data=image_url, media_type="IMAGE")
23 changes: 6 additions & 17 deletions tests/integration/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"


def test_task_errors(dataset, image_url, snapshot, is_adv_enabled):
def test_task_errors(dataset, image_url, snapshot):
client = dataset.client
task = dataset.create_data_rows([
{
Expand All @@ -25,22 +25,11 @@ def test_task_errors(dataset, image_url, snapshot, is_adv_enabled):

assert task in client.get_user().created_tasks()
task.wait_till_done()
if is_adv_enabled:
assert len(task.failed_data_rows) == 1
assert "A schemaId can only be specified once per DataRow : [cko8s9r5v0001h2dk9elqdidh]" in task.failed_data_rows[
0]['message']
assert len(
task.failed_data_rows[0]['failedDataRows'][0]['metadata']) == 2
else:
snapshot.snapshot_dir = INTEGRATION_SNAPSHOT_DIRECTORY
# RowData is dynamic, so we need to remove it from the snapshot
task.failed_data_rows[0]['failedDataRows'][0]['rowData'] = ''
snapshot.assert_match(
json.dumps(task.failed_data_rows),
'test_task.test_task_errors.failed_data_rows.json')
assert task.errors is not None
snapshot.assert_match(json.dumps(task.errors),
'test_task.test_task_errors.errors.json')

assert len(task.failed_data_rows) == 1
assert "A schemaId can only be specified once per DataRow : [cko8s9r5v0001h2dk9elqdidh]" in task.failed_data_rows[
0]['message']
assert len(task.failed_data_rows[0]['failedDataRows'][0]['metadata']) == 2


def test_task_success_json(dataset, image_url, snapshot):
Expand Down