Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
echo "LABELBOX_TEST_ENVIRON=prod" >> $GITHUB_ENV
else
echo "LABELBOX_TEST_ENVIRON=staging" >> $GITHUB_ENV
echo "FIXTURE_PROFILE=true" >> $GITHUB_ENV
fi

- uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ test-local: build-image
-e LABELBOX_TEST_ENVIRON="local" \
-e DA_GCP_LABELBOX_API_KEY=${DA_GCP_LABELBOX_API_KEY} \
-e LABELBOX_TEST_API_KEY_LOCAL=${LABELBOX_TEST_API_KEY_LOCAL} \
-e FIXTURE_PROFILE=true \
local/labelbox-python:test pytest $(PATH_TO_TEST)

test-staging: build-image
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
]


@pytest.fixture
@pytest.fixture(scope="session")
def rand_gen():

def gen(field_type):
Expand Down
93 changes: 78 additions & 15 deletions tests/integration/annotation_import/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Type
from labelbox.schema.labeling_frontend import LabelingFrontend
from labelbox.schema.annotation_import import LabelImport, AnnotationImportState
from labelbox.schema.project import Project
from labelbox.schema.queue_mode import QueueMode

DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 40
Expand Down Expand Up @@ -210,7 +211,7 @@ def annotations_by_data_type_v2(
}


@pytest.fixture
@pytest.fixture(scope='session')
def ontology():
bbox_tool_with_nested_text = {
'required':
Expand Down Expand Up @@ -478,34 +479,49 @@ def func(project):


@pytest.fixture
def initial_dataset(client, rand_gen):
dataset = client.create_dataset(name=rand_gen(str))
yield dataset
dataset.delete()
def configured_project_datarow_id(configured_project):

def get_data_row_id(indx=0):
return configured_project.data_row_ids[indx]

yield get_data_row_id


@pytest.fixture
def configured_project_one_datarow_id(configured_project_with_one_data_row):

def get_data_row_id(indx=0):
return configured_project_with_one_data_row.data_row_ids[0]

yield get_data_row_id


@pytest.fixture
def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
dataset = initial_dataset
project = client.create_project(
name=rand_gen(str),
queue_mode=QueueMode.Batch,
)
project = client.create_project(name=rand_gen(str),
queue_mode=QueueMode.Batch)
editor = list(
client.get_labeling_frontends(
where=LabelingFrontend.name == "editor"))[0]
project.setup(editor, ontology)

data_row_ids = []

for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
project.create_batch(
project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
sleep_interval=3)

batch = project.create_batch(
rand_gen(str),
data_row_ids, # sample of data row objects
5 # priority between 1(Highest) - 5(lowest)
)
project.data_row_ids = data_row_ids

yield project

project.delete()


Expand Down Expand Up @@ -556,27 +572,74 @@ def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row,


@pytest.fixture
def configured_project_without_data_rows(client, ontology, rand_gen):
def configured_project_with_one_data_row(client, ontology, rand_gen,
initial_dataset, image_url):
project = client.create_project(name=rand_gen(str),
description=rand_gen(str),
queue_mode=QueueMode.Batch)
editor = list(
client.get_labeling_frontends(
where=LabelingFrontend.name == "editor"))[0]
project.setup(editor, ontology)

data_row = initial_dataset.create_data_row(row_data=image_url)
data_row_ids = [data_row.uid]
project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
sleep_interval=3)

batch = project.create_batch(
rand_gen(str),
data_row_ids, # sample of data row objects
5 # priority between 1(Highest) - 5(lowest)
)
project.data_row_ids = data_row_ids

yield project

batch.delete()
project.delete()


# This function allows to convert an ontology feature to actual annotation
# At the moment it expects only one feature per tool type and this creates unnecessary coupling between differet tests
# In an example of a 'rectangle' we have extended to support multiple instances of the same tool type
# TODO: we will support this approach in the future for all tools
#
"""
Please note that this fixture now offers the flexibility to configure three different strategies for generating data row ids for predictions:
Default(configured_project fixture):
configured_project that generates a data row for each member of ontology.
This makes sure each prediction has its own data row id. This is applicable to prediction upload cases when last label overwrites existing ones

Optimized Strategy (configured_project_with_one_data_row fixture):
This fixture has only one data row and all predictions will be mapped to it

Custom Data Row IDs Strategy:
Individuals can supply hard-coded data row ids when a creation of data row is not required.
This particular fixture, termed "hardcoded_datarow_id," should be defined locally within a test file.
In the future, we can use this approach to inject correct number of rows instead of using configured_project fixture
that creates a data row for each member of ontology (14 in total) for each run.
"""


@pytest.fixture
def prediction_id_mapping(configured_project):
def prediction_id_mapping(ontology, request):
# Maps tool types to feature schema ids
project = configured_project
if 'configured_project' in request.fixturenames:
data_row_id_factory = request.getfixturevalue(
'configured_project_datarow_id')
project = request.getfixturevalue('configured_project')
elif 'hardcoded_datarow_id' in request.fixturenames:
data_row_id_factory = request.getfixturevalue('hardcoded_datarow_id')
project = request.getfixturevalue('configured_project_with_ontology')
else:
data_row_id_factory = request.getfixturevalue(
'configured_project_one_datarow_id')
project = request.getfixturevalue(
'configured_project_with_one_data_row')

ontology = project.ontology().normalized

result = {}

for idx, tool in enumerate(ontology['tools'] + ontology['classifications']):
Expand All @@ -593,7 +656,7 @@ def prediction_id_mapping(configured_project):
"schemaId": tool['featureSchemaId'],
"name": tool['name'],
"dataRow": {
"id": project.data_row_ids[idx],
"id": data_row_id_factory(idx),
},
'tool': tool
}
Expand All @@ -606,7 +669,7 @@ def prediction_id_mapping(configured_project):
"schemaId": tool['featureSchemaId'],
"name": tool['name'],
"dataRow": {
"id": project.data_row_ids[idx],
"id": data_row_id_factory(idx),
},
'tool': tool
}
Expand Down
70 changes: 34 additions & 36 deletions tests/integration/annotation_import/test_bulk_import_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,40 +25,40 @@
"""


def test_create_from_url(configured_project):
def test_create_from_url(project):
name = str(uuid.uuid4())
url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"

bulk_import_request = configured_project.upload_annotations(name=name,
annotations=url,
validate=False)
bulk_import_request = project.upload_annotations(name=name,
annotations=url,
validate=False)

assert bulk_import_request.project() == configured_project
assert bulk_import_request.project() == project
assert bulk_import_request.name == name
assert bulk_import_request.input_file_url == url
assert bulk_import_request.error_file_url is None
assert bulk_import_request.status_file_url is None
assert bulk_import_request.state == BulkImportRequestState.RUNNING


def test_validate_file(configured_project):
def test_validate_file(project_with_empty_ontology):
name = str(uuid.uuid4())
url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
with pytest.raises(MALValidationError):
configured_project.upload_annotations(name=name,
annotations=url,
validate=True)
project_with_empty_ontology.upload_annotations(name=name,
annotations=url,
validate=True)
#Schema ids shouldn't match


def test_create_from_objects(configured_project, predictions,
def test_create_from_objects(configured_project_with_one_data_row, predictions,
annotation_import_test_helpers):
name = str(uuid.uuid4())

bulk_import_request = configured_project.upload_annotations(
bulk_import_request = configured_project_with_one_data_row.upload_annotations(
name=name, annotations=predictions)

assert bulk_import_request.project() == configured_project
assert bulk_import_request.project() == configured_project_with_one_data_row
assert bulk_import_request.name == name
assert bulk_import_request.error_file_url is None
assert bulk_import_request.status_file_url is None
Expand Down Expand Up @@ -105,34 +105,33 @@ def test_create_from_local_file(tmp_path, predictions, configured_project,
bulk_import_request.input_file_url, predictions)


def test_get(client, configured_project):
def test_get(client, configured_project_with_one_data_row):
name = str(uuid.uuid4())
url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
configured_project.upload_annotations(name=name,
annotations=url,
validate=False)
configured_project_with_one_data_row.upload_annotations(name=name,
annotations=url,
validate=False)

bulk_import_request = BulkImportRequest.from_name(
client, project_id=configured_project.uid, name=name)
client, project_id=configured_project_with_one_data_row.uid, name=name)

assert bulk_import_request.project() == configured_project
assert bulk_import_request.project() == configured_project_with_one_data_row
assert bulk_import_request.name == name
assert bulk_import_request.input_file_url == url
assert bulk_import_request.error_file_url is None
assert bulk_import_request.status_file_url is None
assert bulk_import_request.state == BulkImportRequestState.RUNNING


def test_validate_ndjson(tmp_path, configured_project):
def test_validate_ndjson(tmp_path, configured_project_with_one_data_row):
file_name = f"broken.ndjson"
file_path = tmp_path / file_name
with file_path.open("w") as f:
f.write("test")

with pytest.raises(ValueError):
configured_project.upload_annotations(name="name",
validate=True,
annotations=str(file_path))
configured_project_with_one_data_row.upload_annotations(
name="name", validate=True, annotations=str(file_path))


def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
Expand All @@ -158,14 +157,13 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):


@pytest.mark.slow
def test_wait_till_done(rectangle_inference, configured_project):
def test_wait_till_done(rectangle_inference,
configured_project_with_one_data_row):
name = str(uuid.uuid4())
url = configured_project.client.upload_data(content=parser.dumps(
[rectangle_inference]),
sign=True)
bulk_import_request = configured_project.upload_annotations(name=name,
annotations=url,
validate=False)
url = configured_project_with_one_data_row.client.upload_data(
content=parser.dumps([rectangle_inference]), sign=True)
bulk_import_request = configured_project_with_one_data_row.upload_annotations(
name=name, annotations=url, validate=False)

assert len(bulk_import_request.inputs) == 1
bulk_import_request.wait_until_done()
Expand Down Expand Up @@ -299,7 +297,7 @@ def test_pdf_mal_bbox(client, configured_project_pdf):
assert import_annotations.errors == []


def test_pdf_document_entity(client, configured_project_without_data_rows,
def test_pdf_document_entity(client, configured_project_with_one_data_row,
dataset_pdf_entity, rand_gen):
# for content "Metal-insulator (MI) transitions have been one of the" in OCR JSON extract tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json
document_text_selection = DocumentTextSelection(
Expand All @@ -323,7 +321,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,

labels = []
_, data_row_uids = dataset_pdf_entity
configured_project_without_data_rows.create_batch(
configured_project_with_one_data_row.create_batch(
rand_gen(str),
data_row_uids, # sample of data row objects
5 # priority between 1(Highest) - 5(lowest)
Expand All @@ -338,7 +336,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,

import_annotations = MALPredictionImport.create_from_objects(
client=client,
project_id=configured_project_without_data_rows.uid,
project_id=configured_project_with_one_data_row.uid,
name=f"import {str(uuid.uuid4())}",
predictions=labels)
import_annotations.wait_until_done()
Expand All @@ -347,14 +345,14 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,


def test_nested_video_object_annotations(client,
configured_project_without_data_rows,
configured_project_with_one_data_row,
video_data,
bbox_video_annotation_objects,
rand_gen):
labels = []
_, data_row_uids = video_data
configured_project_without_data_rows.update(media_type=MediaType.Video)
configured_project_without_data_rows.create_batch(
configured_project_with_one_data_row.update(media_type=MediaType.Video)
configured_project_with_one_data_row.create_batch(
rand_gen(str),
data_row_uids, # sample of data row objects
5 # priority between 1(Highest) - 5(lowest)
Expand All @@ -366,7 +364,7 @@ def test_nested_video_object_annotations(client,
annotations=bbox_video_annotation_objects))
import_annotations = MALPredictionImport.create_from_objects(
client=client,
project_id=configured_project_without_data_rows.uid,
project_id=configured_project_with_one_data_row.uid,
name=f"import {str(uuid.uuid4())}",
predictions=labels)
import_annotations.wait_until_done()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from labelbox.schema.annotation_import import MALPredictionImport


def test_conversation_entity(client, configured_project_without_data_rows,
def test_conversation_entity(client, configured_project_with_one_data_row,
dataset_conversation_entity, rand_gen):

conversation_entity_annotation = ConversationEntity(start=0,
Expand All @@ -20,7 +20,7 @@ def test_conversation_entity(client, configured_project_without_data_rows,
labels = []
_, data_row_uids = dataset_conversation_entity

configured_project_without_data_rows.create_batch(
configured_project_with_one_data_row.create_batch(
rand_gen(str),
data_row_uids, # sample of data row objects
5 # priority between 1(Highest) - 5(lowest)
Expand All @@ -35,7 +35,7 @@ def test_conversation_entity(client, configured_project_without_data_rows,

import_annotations = MALPredictionImport.create_from_objects(
client=client,
project_id=configured_project_without_data_rows.uid,
project_id=configured_project_with_one_data_row.uid,
name=f"import {str(uuid.uuid4())}",
predictions=labels)

Expand Down
Loading