diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6ed378f09..83c0393af 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -39,6 +39,7 @@ jobs: echo "LABELBOX_TEST_ENVIRON=prod" >> $GITHUB_ENV else echo "LABELBOX_TEST_ENVIRON=staging" >> $GITHUB_ENV + echo "FIXTURE_PROFILE=true" >> $GITHUB_ENV fi - uses: actions/checkout@v2 diff --git a/Makefile b/Makefile index f9f490554..b7838a7d4 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ test-local: build-image -e LABELBOX_TEST_ENVIRON="local" \ -e DA_GCP_LABELBOX_API_KEY=${DA_GCP_LABELBOX_API_KEY} \ -e LABELBOX_TEST_API_KEY_LOCAL=${LABELBOX_TEST_API_KEY_LOCAL} \ + -e FIXTURE_PROFILE=true \ local/labelbox-python:test pytest $(PATH_TO_TEST) test-staging: build-image diff --git a/tests/conftest.py b/tests/conftest.py index b4dd6dce0..b724426d8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ ] -@pytest.fixture +@pytest.fixture(scope="session") def rand_gen(): def gen(field_type): diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 6db398fe5..d50c44d0c 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -9,6 +9,7 @@ from typing import Type from labelbox.schema.labeling_frontend import LabelingFrontend from labelbox.schema.annotation_import import LabelImport, AnnotationImportState +from labelbox.schema.project import Project from labelbox.schema.queue_mode import QueueMode DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 40 @@ -210,7 +211,7 @@ def annotations_by_data_type_v2( } -@pytest.fixture +@pytest.fixture(scope='session') def ontology(): bbox_tool_with_nested_text = { 'required': @@ -478,34 +479,49 @@ def func(project): @pytest.fixture -def initial_dataset(client, rand_gen): - dataset = client.create_dataset(name=rand_gen(str)) - yield dataset - dataset.delete() +def configured_project_datarow_id(configured_project): + + def get_data_row_id(indx=0): + return configured_project.data_row_ids[indx] + + yield get_data_row_id + + +@pytest.fixture +def configured_project_one_datarow_id(configured_project_with_one_data_row): + + def get_data_row_id(indx=0): + return configured_project_with_one_data_row.data_row_ids[0] + + yield get_data_row_id @pytest.fixture def configured_project(client, initial_dataset, ontology, rand_gen, image_url): dataset = initial_dataset - project = client.create_project( - name=rand_gen(str), - queue_mode=QueueMode.Batch, - ) + project = client.create_project(name=rand_gen(str), + queue_mode=QueueMode.Batch) editor = list( client.get_labeling_frontends( where=LabelingFrontend.name == "editor"))[0] project.setup(editor, ontology) + data_row_ids = [] for _ in range(len(ontology['tools']) + len(ontology['classifications'])): data_row_ids.append(dataset.create_data_row(row_data=image_url).uid) - project.create_batch( + project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids, + sleep_interval=3) + + batch = project.create_batch( rand_gen(str), data_row_ids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) ) project.data_row_ids = data_row_ids + yield project + project.delete() @@ -556,7 +572,8 @@ def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row, @pytest.fixture -def configured_project_without_data_rows(client, ontology, rand_gen): +def configured_project_with_one_data_row(client, ontology, rand_gen, + initial_dataset, image_url): project = client.create_project(name=rand_gen(str), description=rand_gen(str), queue_mode=QueueMode.Batch) @@ -564,7 +581,22 @@ def configured_project_without_data_rows(client, ontology, rand_gen): client.get_labeling_frontends( where=LabelingFrontend.name == "editor"))[0] project.setup(editor, ontology) + + data_row = initial_dataset.create_data_row(row_data=image_url) + data_row_ids = [data_row.uid] + project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids, + sleep_interval=3) + + batch = project.create_batch( + rand_gen(str), + data_row_ids, # sample of data row objects + 5 # priority between 1(Highest) - 5(lowest) + ) + project.data_row_ids = data_row_ids + yield project + + batch.delete() project.delete() @@ -572,11 +604,42 @@ def configured_project_without_data_rows(client, ontology, rand_gen): # At the moment it expects only one feature per tool type and this creates unnecessary coupling between differet tests # In an example of a 'rectangle' we have extended to support multiple instances of the same tool type # TODO: we will support this approach in the future for all tools +# +""" +Please note that this fixture now offers the flexibility to configure three different strategies for generating data row ids for predictions: +Default(configured_project fixture): + configured_project that generates a data row for each member of ontology. + This makes sure each prediction has its own data row id. This is applicable to prediction upload cases when last label overwrites existing ones + +Optimized Strategy (configured_project_with_one_data_row fixture): + This fixture has only one data row and all predictions will be mapped to it + +Custom Data Row IDs Strategy: + Individuals can supply hard-coded data row ids when a creation of data row is not required. + This particular fixture, termed "hardcoded_datarow_id," should be defined locally within a test file. + In the future, we can use this approach to inject correct number of rows instead of using configured_project fixture + that creates a data row for each member of ontology (14 in total) for each run. +""" + + @pytest.fixture -def prediction_id_mapping(configured_project): +def prediction_id_mapping(ontology, request): # Maps tool types to feature schema ids - project = configured_project + if 'configured_project' in request.fixturenames: + data_row_id_factory = request.getfixturevalue( + 'configured_project_datarow_id') + project = request.getfixturevalue('configured_project') + elif 'hardcoded_datarow_id' in request.fixturenames: + data_row_id_factory = request.getfixturevalue('hardcoded_datarow_id') + project = request.getfixturevalue('configured_project_with_ontology') + else: + data_row_id_factory = request.getfixturevalue( + 'configured_project_one_datarow_id') + project = request.getfixturevalue( + 'configured_project_with_one_data_row') + ontology = project.ontology().normalized + result = {} for idx, tool in enumerate(ontology['tools'] + ontology['classifications']): @@ -593,7 +656,7 @@ def prediction_id_mapping(configured_project): "schemaId": tool['featureSchemaId'], "name": tool['name'], "dataRow": { - "id": project.data_row_ids[idx], + "id": data_row_id_factory(idx), }, 'tool': tool } @@ -606,7 +669,7 @@ def prediction_id_mapping(configured_project): "schemaId": tool['featureSchemaId'], "name": tool['name'], "dataRow": { - "id": project.data_row_ids[idx], + "id": data_row_id_factory(idx), }, 'tool': tool } diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index 4f001af8d..52552f53d 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -25,15 +25,15 @@ """ -def test_create_from_url(configured_project): +def test_create_from_url(project): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - bulk_import_request = configured_project.upload_annotations(name=name, - annotations=url, - validate=False) + bulk_import_request = project.upload_annotations(name=name, + annotations=url, + validate=False) - assert bulk_import_request.project() == configured_project + assert bulk_import_request.project() == project assert bulk_import_request.name == name assert bulk_import_request.input_file_url == url assert bulk_import_request.error_file_url is None @@ -41,24 +41,24 @@ def test_create_from_url(configured_project): assert bulk_import_request.state == BulkImportRequestState.RUNNING -def test_validate_file(configured_project): +def test_validate_file(project_with_empty_ontology): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" with pytest.raises(MALValidationError): - configured_project.upload_annotations(name=name, - annotations=url, - validate=True) + project_with_empty_ontology.upload_annotations(name=name, + annotations=url, + validate=True) #Schema ids shouldn't match -def test_create_from_objects(configured_project, predictions, +def test_create_from_objects(configured_project_with_one_data_row, predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) - bulk_import_request = configured_project.upload_annotations( + bulk_import_request = configured_project_with_one_data_row.upload_annotations( name=name, annotations=predictions) - assert bulk_import_request.project() == configured_project + assert bulk_import_request.project() == configured_project_with_one_data_row assert bulk_import_request.name == name assert bulk_import_request.error_file_url is None assert bulk_import_request.status_file_url is None @@ -105,17 +105,17 @@ def test_create_from_local_file(tmp_path, predictions, configured_project, bulk_import_request.input_file_url, predictions) -def test_get(client, configured_project): +def test_get(client, configured_project_with_one_data_row): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - configured_project.upload_annotations(name=name, - annotations=url, - validate=False) + configured_project_with_one_data_row.upload_annotations(name=name, + annotations=url, + validate=False) bulk_import_request = BulkImportRequest.from_name( - client, project_id=configured_project.uid, name=name) + client, project_id=configured_project_with_one_data_row.uid, name=name) - assert bulk_import_request.project() == configured_project + assert bulk_import_request.project() == configured_project_with_one_data_row assert bulk_import_request.name == name assert bulk_import_request.input_file_url == url assert bulk_import_request.error_file_url is None @@ -123,16 +123,15 @@ def test_get(client, configured_project): assert bulk_import_request.state == BulkImportRequestState.RUNNING -def test_validate_ndjson(tmp_path, configured_project): +def test_validate_ndjson(tmp_path, configured_project_with_one_data_row): file_name = f"broken.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: f.write("test") with pytest.raises(ValueError): - configured_project.upload_annotations(name="name", - validate=True, - annotations=str(file_path)) + configured_project_with_one_data_row.upload_annotations( + name="name", validate=True, annotations=str(file_path)) def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): @@ -158,14 +157,13 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): @pytest.mark.slow -def test_wait_till_done(rectangle_inference, configured_project): +def test_wait_till_done(rectangle_inference, + configured_project_with_one_data_row): name = str(uuid.uuid4()) - url = configured_project.client.upload_data(content=parser.dumps( - [rectangle_inference]), - sign=True) - bulk_import_request = configured_project.upload_annotations(name=name, - annotations=url, - validate=False) + url = configured_project_with_one_data_row.client.upload_data( + content=parser.dumps([rectangle_inference]), sign=True) + bulk_import_request = configured_project_with_one_data_row.upload_annotations( + name=name, annotations=url, validate=False) assert len(bulk_import_request.inputs) == 1 bulk_import_request.wait_until_done() @@ -299,7 +297,7 @@ def test_pdf_mal_bbox(client, configured_project_pdf): assert import_annotations.errors == [] -def test_pdf_document_entity(client, configured_project_without_data_rows, +def test_pdf_document_entity(client, configured_project_with_one_data_row, dataset_pdf_entity, rand_gen): # for content "Metal-insulator (MI) transitions have been one of the" in OCR JSON extract tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json document_text_selection = DocumentTextSelection( @@ -323,7 +321,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows, labels = [] _, data_row_uids = dataset_pdf_entity - configured_project_without_data_rows.create_batch( + configured_project_with_one_data_row.create_batch( rand_gen(str), data_row_uids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) @@ -338,7 +336,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows, import_annotations = MALPredictionImport.create_from_objects( client=client, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, name=f"import {str(uuid.uuid4())}", predictions=labels) import_annotations.wait_until_done() @@ -347,14 +345,14 @@ def test_pdf_document_entity(client, configured_project_without_data_rows, def test_nested_video_object_annotations(client, - configured_project_without_data_rows, + configured_project_with_one_data_row, video_data, bbox_video_annotation_objects, rand_gen): labels = [] _, data_row_uids = video_data - configured_project_without_data_rows.update(media_type=MediaType.Video) - configured_project_without_data_rows.create_batch( + configured_project_with_one_data_row.update(media_type=MediaType.Video) + configured_project_with_one_data_row.create_batch( rand_gen(str), data_row_uids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) @@ -366,7 +364,7 @@ def test_nested_video_object_annotations(client, annotations=bbox_video_annotation_objects)) import_annotations = MALPredictionImport.create_from_objects( client=client, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, name=f"import {str(uuid.uuid4())}", predictions=labels) import_annotations.wait_until_done() diff --git a/tests/integration/annotation_import/test_conversation_import.py b/tests/integration/annotation_import/test_conversation_import.py index ac2d5419c..9f1d26e31 100644 --- a/tests/integration/annotation_import/test_conversation_import.py +++ b/tests/integration/annotation_import/test_conversation_import.py @@ -7,7 +7,7 @@ from labelbox.schema.annotation_import import MALPredictionImport -def test_conversation_entity(client, configured_project_without_data_rows, +def test_conversation_entity(client, configured_project_with_one_data_row, dataset_conversation_entity, rand_gen): conversation_entity_annotation = ConversationEntity(start=0, @@ -20,7 +20,7 @@ def test_conversation_entity(client, configured_project_without_data_rows, labels = [] _, data_row_uids = dataset_conversation_entity - configured_project_without_data_rows.create_batch( + configured_project_with_one_data_row.create_batch( rand_gen(str), data_row_uids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) @@ -35,7 +35,7 @@ def test_conversation_entity(client, configured_project_without_data_rows, import_annotations = MALPredictionImport.create_from_objects( client=client, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, name=f"import {str(uuid.uuid4())}", predictions=labels) diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py index f8e392cf5..79e8b03cb 100644 --- a/tests/integration/annotation_import/test_data_types.py +++ b/tests/integration/annotation_import/test_data_types.py @@ -135,12 +135,18 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name): AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData ]) -def test_import_data_types(client, configured_project, initial_dataset, - rand_gen, data_row_json_by_data_type, - annotations_by_data_type, data_type_class): +def test_import_data_types( + client, + configured_project, + initial_dataset, + rand_gen, + data_row_json_by_data_type, + annotations_by_data_type, + data_type_class, +): project = configured_project - project_id = configured_project.uid + project_id = project.uid dataset = initial_dataset set_project_media_type_from_data_type(project, data_type_class) @@ -261,11 +267,11 @@ def test_import_data_types_v2(client, configured_project, initial_dataset, @pytest.mark.parametrize('data_type, data_class, annotations', test_params) -def test_import_label_annotations(client, configured_project, initial_dataset, - data_row_json_by_data_type, data_type, - data_class, annotations, rand_gen): +def test_import_label_annotations(client, configured_project_with_one_data_row, + initial_dataset, data_row_json_by_data_type, + data_type, data_class, annotations, rand_gen): - project = configured_project + project = configured_project_with_one_data_row dataset = initial_dataset set_project_media_type_from_data_type(project, data_class) @@ -297,24 +303,33 @@ def test_import_label_annotations(client, configured_project, initial_dataset, assert export_task.errors is None expected_annotations = get_annotation_comparison_dicts_from_labels(labels) actual_annotations = get_annotation_comparison_dicts_from_export( - export_task.result, data_row.uid, configured_project.uid) + export_task.result, data_row.uid, + configured_project_with_one_data_row.uid) assert actual_annotations == expected_annotations data_row.delete() @pytest.mark.parametrize('data_type, data_class, annotations', test_params) -def test_import_mal_annotations(client, configured_project_without_data_rows, - data_row_json_by_data_type, data_type, - data_class, annotations, rand_gen): - +@pytest.fixture +def one_datarow(client, rand_gen, data_row_json_by_data_type, data_type): dataset = client.create_dataset(name=rand_gen(str)) data_row_json = data_row_json_by_data_type[data_type] data_row = dataset.create_data_row(data_row_json) - set_project_media_type_from_data_type(configured_project_without_data_rows, + yield data_row + + dataset.delete() + + +@pytest.mark.parametrize('data_type, data_class, annotations', test_params) +def test_import_mal_annotations(client, configured_project_with_one_data_row, + data_type, data_class, annotations, rand_gen, + one_datarow): + data_row = one_datarow + set_project_media_type_from_data_type(configured_project_with_one_data_row, data_class) - configured_project_without_data_rows.create_batch( + configured_project_with_one_data_row.create_batch( rand_gen(str), [data_row.uid], ) @@ -326,7 +341,7 @@ def test_import_mal_annotations(client, configured_project_without_data_rows, import_annotations = lb.MALPredictionImport.create_from_objects( client=client, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, name=f"import {str(uuid.uuid4())}", predictions=labels) import_annotations.wait_until_done() diff --git a/tests/integration/annotation_import/test_label_import.py b/tests/integration/annotation_import/test_label_import.py index 198ce2e3e..61c602c52 100644 --- a/tests/integration/annotation_import/test_label_import.py +++ b/tests/integration/annotation_import/test_label_import.py @@ -9,13 +9,16 @@ """ -def test_create_from_url(client, configured_project, +def test_create_from_url(client, configured_project_with_one_data_row, annotation_import_test_helpers): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" label_import = LabelImport.create_from_url( - client=client, project_id=configured_project.uid, name=name, url=url) - assert label_import.parent_id == configured_project.uid + client=client, + project_id=configured_project_with_one_data_row.uid, + name=name, + url=url) + assert label_import.parent_id == configured_project_with_one_data_row.uid annotation_import_test_helpers.check_running_state(label_import, name, url) @@ -52,13 +55,17 @@ def test_create_from_objects(client, configured_project, object_predictions, # annotation_import_test_helpers.assert_file_content(label_import.input_file_url, object_predictions) -def test_get(client, configured_project, annotation_import_test_helpers): +def test_get(client, configured_project_with_one_data_row, + annotation_import_test_helpers): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" label_import = LabelImport.create_from_url( - client=client, project_id=configured_project.uid, name=name, url=url) + client=client, + project_id=configured_project_with_one_data_row.uid, + name=name, + url=url) - assert label_import.parent_id == configured_project.uid + assert label_import.parent_id == configured_project_with_one_data_row.uid annotation_import_test_helpers.check_running_state(label_import, name, url) diff --git a/tests/integration/annotation_import/test_model.py b/tests/integration/annotation_import/test_model.py index dcfe9ef2c..131ecd9d0 100644 --- a/tests/integration/annotation_import/test_model.py +++ b/tests/integration/annotation_import/test_model.py @@ -4,14 +4,14 @@ from labelbox.exceptions import ResourceNotFoundError -def test_model(client, configured_project, rand_gen): +def test_model(client, configured_project_with_one_data_row, rand_gen): # Get all models = list(client.get_models()) for m in models: assert isinstance(m, Model) # Create - ontology = configured_project.ontology() + ontology = configured_project_with_one_data_row.ontology() data = {"name": rand_gen(str), "ontology_id": ontology.uid} model = client.create_model(data["name"], data["ontology_id"]) assert model.name == data["name"] diff --git a/tests/integration/annotation_import/test_model_run.py b/tests/integration/annotation_import/test_model_run.py index c94c78cde..328b38ba5 100644 --- a/tests/integration/annotation_import/test_model_run.py +++ b/tests/integration/annotation_import/test_model_run.py @@ -87,11 +87,12 @@ def test_model_run_data_rows_delete(model_run_with_data_rows): assert len(before) == len(after) + 1 -def test_model_run_upsert_data_rows(dataset, model_run, configured_project): +def test_model_run_upsert_data_rows(dataset, model_run, + configured_project_with_one_data_row): n_model_run_data_rows = len(list(model_run.model_run_data_rows())) assert n_model_run_data_rows == 0 data_row = dataset.create_data_row(row_data="test row data") - configured_project._wait_until_data_rows_are_processed( + configured_project_with_one_data_row._wait_until_data_rows_are_processed( data_row_ids=[data_row.uid]) model_run.upsert_data_rows([data_row.uid]) n_model_run_data_rows = len(list(model_run.model_run_data_rows())) @@ -167,15 +168,14 @@ def get_model_run_status(): errorMessage) -def test_model_run_split_assignment_by_data_row_ids(model_run, dataset, - image_url, - configured_project): - n_data_rows = 10 +def test_model_run_split_assignment_by_data_row_ids( + model_run, dataset, image_url, configured_project_with_one_data_row): + n_data_rows = 2 data_rows = dataset.create_data_rows([{ "row_data": image_url } for _ in range(n_data_rows)]) data_row_ids = [data_row['id'] for data_row in data_rows.result] - configured_project._wait_until_data_rows_are_processed( + configured_project_with_one_data_row._wait_until_data_rows_are_processed( data_row_ids=data_row_ids) model_run.upsert_data_rows(data_row_ids) diff --git a/tests/integration/annotation_import/test_ndjson_validation.py b/tests/integration/annotation_import/test_ndjson_validation.py index 53bb85eed..123752402 100644 --- a/tests/integration/annotation_import/test_ndjson_validation.py +++ b/tests/integration/annotation_import/test_ndjson_validation.py @@ -9,6 +9,32 @@ NDRadio, NDRectangle, NDText, NDTextEntity, NDTool, _validate_ndjson) +from labelbox.schema.labeling_frontend import LabelingFrontend +from labelbox.schema.queue_mode import QueueMode + + +@pytest.fixture(scope="module", autouse=True) +def hardcoded_datarow_id(): + data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q' + + def get_data_row_id(indx=0): + return data_row_id + + yield get_data_row_id + + +@pytest.fixture(scope="module", autouse=True) +def configured_project_with_ontology(client, ontology, rand_gen): + project = client.create_project(name=rand_gen(str), + queue_mode=QueueMode.Batch) + editor = list( + client.get_labeling_frontends( + where=LabelingFrontend.name == "editor"))[0] + project.setup(editor, ontology) + + yield project + + project.delete() def test_classification_construction(checklist_inference, text_inference): @@ -37,187 +63,195 @@ def test_tool_construction(inference, expected_type): def test_incorrect_feature_schema(rectangle_inference, polygon_inference, - configured_project): + configured_project_with_ontology): #Valid but incorrect feature schema #Prob the error message says something about the config not anything useful. We might want to fix this. pred = rectangle_inference.copy() pred['schemaId'] = polygon_inference['schemaId'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def no_tool(text_inference, configured_project): +def no_tool(text_inference, configured_project_with_ontology): pred = text_inference.copy() #Missing key del pred['answer'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_invalid_text(text_inference, configured_project): +def test_invalid_text(text_inference, configured_project_with_ontology): #and if it is not a string pred = text_inference.copy() #Extra and wrong key del pred['answer'] pred['answers'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) del pred['answers'] #Invalid type pred['answer'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) #Invalid type pred['answer'] = None with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_invalid_checklist_item(checklist_inference, configured_project): +def test_invalid_checklist_item(checklist_inference, + configured_project_with_ontology): #Only two points pred = checklist_inference.copy() pred['answers'] = [pred['answers'][0], pred['answers'][0]] #Duplicate schema ids with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) pred['answers'] = [{"name": "asdfg"}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) pred['answers'] = [{"schemaId": "1232132132"}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) pred['answers'] = [{}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) pred['answers'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) del pred['answers'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_invalid_polygon(polygon_inference, configured_project): +def test_invalid_polygon(polygon_inference, configured_project_with_ontology): #Only two points pred = polygon_inference.copy() pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_incorrect_entity(entity_inference, configured_project): +def test_incorrect_entity(entity_inference, configured_project_with_ontology): entity = entity_inference.copy() #Location cannot be a list entity["location"] = [0, 10] with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) + _validate_ndjson([entity], configured_project_with_ontology) entity["location"] = {"start": -1, "end": 5} with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) + _validate_ndjson([entity], configured_project_with_ontology) entity["location"] = {"start": 15, "end": 5} with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) + _validate_ndjson([entity], configured_project_with_ontology) -def test_incorrect_mask(segmentation_inference, configured_project): +def test_incorrect_mask(segmentation_inference, + configured_project_with_ontology): seg = segmentation_inference.copy() seg['mask']['colorRGB'] = [-1, 0, 10] with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project) + _validate_ndjson([seg], configured_project_with_ontology) seg['mask']['colorRGB'] = [0, 0] with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project) + _validate_ndjson([seg], configured_project_with_ontology) seg['mask'] = {'counts': [0], 'size': [0, 1]} with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project) + _validate_ndjson([seg], configured_project_with_ontology) seg['mask'] = {'counts': [-1], 'size': [1, 1]} with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project) + _validate_ndjson([seg], configured_project_with_ontology) -def test_all_validate_json(configured_project, predictions): +def test_all_validate_json(configured_project_with_ontology, predictions): #Predictions contains one of each type of prediction. #These should be properly formatted and pass. - _validate_ndjson(predictions, configured_project) + _validate_ndjson(predictions, configured_project_with_ontology) -def test_incorrect_line(line_inference, configured_project): +def test_incorrect_line(line_inference, configured_project_with_ontology): line = line_inference.copy() line["line"] = [line["line"][0]] #Just one point with pytest.raises(MALValidationError): - _validate_ndjson([line], configured_project) + _validate_ndjson([line], configured_project_with_ontology) -def test_incorrect_rectangle(rectangle_inference, configured_project): +def test_incorrect_rectangle(rectangle_inference, + configured_project_with_ontology): del rectangle_inference['bbox']['top'] with pytest.raises(MALValidationError): - _validate_ndjson([rectangle_inference], configured_project) + _validate_ndjson([rectangle_inference], + configured_project_with_ontology) -def test_duplicate_tools(rectangle_inference, configured_project): +def test_duplicate_tools(rectangle_inference, configured_project_with_ontology): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_invalid_feature_schema(configured_project, rectangle_inference): +def test_invalid_feature_schema(configured_project_with_ontology, + rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() pred['schemaId'] = "blahblah" with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_name_only_feature_schema(configured_project, rectangle_inference): +def test_name_only_feature_schema(configured_project_with_ontology, + rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['schemaId'] - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_schema_id_only_feature_schema(configured_project, rectangle_inference): +def test_schema_id_only_feature_schema(configured_project_with_ontology, + rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['name'] - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_missing_feature_schema(configured_project, rectangle_inference): +def test_missing_feature_schema(configured_project_with_ontology, + rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['schemaId'] del pred['name'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) -def test_validate_ndjson(tmp_path, configured_project): +def test_validate_ndjson(tmp_path, configured_project_with_ontology): file_name = f"broken.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: f.write("test") with pytest.raises(ValueError): - configured_project.upload_annotations(name="name", - annotations=str(file_path), - validate=True) + configured_project_with_ontology.upload_annotations( + name="name", annotations=str(file_path), validate=True) -def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): +def test_validate_ndjson_uuid(tmp_path, configured_project_with_ontology, + predictions): file_name = f"repeat_uuid.ndjson" file_path = tmp_path / file_name repeat_uuid = predictions.copy() @@ -228,16 +262,15 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): parser.dump(repeat_uuid, f) with pytest.raises(MALValidationError): - configured_project.upload_annotations(name="name", - validate=True, - annotations=str(file_path)) + configured_project_with_ontology.upload_annotations( + name="name", validate=True, annotations=str(file_path)) with pytest.raises(MALValidationError): - configured_project.upload_annotations(name="name", - validate=True, - annotations=repeat_uuid) + configured_project_with_ontology.upload_annotations( + name="name", validate=True, annotations=repeat_uuid) -def test_video_upload(video_checklist_inference, configured_project): +def test_video_upload(video_checklist_inference, + configured_project_with_ontology): pred = video_checklist_inference.copy() - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_with_ontology) diff --git a/tests/integration/annotation_import/test_upsert_prediction_import.py b/tests/integration/annotation_import/test_upsert_prediction_import.py index 9ed045f5e..927b6526d 100644 --- a/tests/integration/annotation_import/test_upsert_prediction_import.py +++ b/tests/integration/annotation_import/test_upsert_prediction_import.py @@ -13,7 +13,7 @@ @pytest.mark.skip() def test_create_from_url(client, tmp_path, object_predictions, model_run_with_data_rows, - configured_project_without_data_rows, + configured_project_with_one_data_row, annotation_import_test_helpers): name = str(uuid.uuid4()) file_name = f"{name}.json" @@ -41,7 +41,7 @@ def test_create_from_url(client, tmp_path, object_predictions, annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=url, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, priority=5) assert annotation_import.model_run_id == model_run_with_data_rows.uid @@ -50,7 +50,7 @@ def test_create_from_url(client, tmp_path, object_predictions, assert annotation_import.statuses assert batch - assert batch.project().uid == configured_project_without_data_rows.uid + assert batch.project().uid == configured_project_with_one_data_row.uid assert mal_prediction_import mal_prediction_import.wait_until_done() @@ -61,7 +61,7 @@ def test_create_from_url(client, tmp_path, object_predictions, @pytest.mark.skip() def test_create_from_objects(model_run_with_data_rows, - configured_project_without_data_rows, + configured_project_with_one_data_row, object_predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) @@ -76,7 +76,7 @@ def test_create_from_objects(model_run_with_data_rows, annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=predictions, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, priority=5) assert annotation_import.model_run_id == model_run_with_data_rows.uid @@ -85,7 +85,7 @@ def test_create_from_objects(model_run_with_data_rows, assert annotation_import.statuses assert batch - assert batch.project().uid == configured_project_without_data_rows.uid + assert batch.project().uid == configured_project_with_one_data_row.uid assert mal_prediction_import mal_prediction_import.wait_until_done() @@ -96,7 +96,7 @@ def test_create_from_objects(model_run_with_data_rows, @pytest.mark.skip() def test_create_from_local_file(tmp_path, model_run_with_data_rows, - configured_project_without_data_rows, + configured_project_with_one_data_row, object_predictions, annotation_import_test_helpers): @@ -119,7 +119,7 @@ def test_create_from_local_file(tmp_path, model_run_with_data_rows, annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=str(file_path), - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, priority=5) assert annotation_import.model_run_id == model_run_with_data_rows.uid @@ -128,7 +128,7 @@ def test_create_from_local_file(tmp_path, model_run_with_data_rows, assert annotation_import.statuses assert batch - assert batch.project().uid == configured_project_without_data_rows.uid + assert batch.project().uid == configured_project_with_one_data_row.uid assert mal_prediction_import mal_prediction_import.wait_until_done() diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ed4229b4d..781fe6edb 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,6 +1,9 @@ +from collections import defaultdict +from itertools import islice import json import os import re +import sys import time import uuid from enum import Enum @@ -18,13 +21,14 @@ from labelbox.schema.annotation_import import LabelImport from labelbox.schema.enums import AnnotationImportState from labelbox.schema.invite import Invite +from labelbox.schema.project import Project from labelbox.schema.queue_mode import QueueMode from labelbox.schema.user import User IMG_URL = "https://picsum.photos/200/300.jpg" SMALL_DATASET_URL = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg" DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 30 -DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS = 5 +DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS = 3 class Environ(Enum): @@ -386,27 +390,36 @@ def initial_dataset(client, rand_gen): dataset = client.create_dataset(name=rand_gen(str)) yield dataset + dataset.delete() + + +@pytest.fixture +def project_with_empty_ontology(project): + editor = list( + project.client.get_labeling_frontends( + where=LabelingFrontend.name == "editor"))[0] + empty_ontology = {"tools": [], "classifications": []} + project.setup(editor, empty_ontology) + yield project + @pytest.fixture -def configured_project(project, initial_dataset, client, rand_gen, image_url): +def configured_project(project_with_empty_ontology, initial_dataset, rand_gen, + image_url): dataset = initial_dataset data_row_id = dataset.create_data_row(row_data=image_url).uid + project = project_with_empty_ontology - project.create_batch( + batch = project.create_batch( rand_gen(str), [data_row_id], # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) ) project.data_row_ids = [data_row_id] - editor = list( - project.client.get_labeling_frontends( - where=LabelingFrontend.name == "editor"))[0] - empty_ontology = {"tools": [], "classifications": []} - project.setup(editor, empty_ontology) yield project - dataset.delete() - project.delete() + + batch.delete() @pytest.fixture @@ -417,6 +430,10 @@ def configured_project_with_label(client, rand_gen, image_url, project, dataset, Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ + project._wait_until_data_rows_are_processed( + data_row_ids=[data_row.uid], + wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS, + sleep_interval=DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS) project.create_batch( rand_gen(str), @@ -426,7 +443,6 @@ def configured_project_with_label(client, rand_gen, image_url, project, dataset, ontology = _setup_ontology(project) label = _create_label(project, data_row, ontology, wait_for_label_processing) - yield [project, dataset, data_row, label] for label in project.labels(): @@ -442,10 +458,8 @@ def configured_batch_project_with_label(project, dataset, data_row, One label is already created and yielded when using fixture """ data_rows = [dr.uid for dr in list(dataset.data_rows())] - project._wait_until_data_rows_are_processed( - data_row_ids=data_rows, - wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS, - sleep_interval=DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS) + project._wait_until_data_rows_are_processed(data_row_ids=data_rows, + sleep_interval=3) project.create_batch("test-batch", data_rows) project.data_row_ids = data_rows @@ -588,7 +602,6 @@ def configured_project_with_complex_ontology(client, initial_dataset, rand_gen, project.setup(editor, ontology.asdict()) yield [project, data_row] - dataset.delete() project.delete() @@ -807,3 +820,33 @@ def upload_invalid_data_rows_for_dataset(dataset: Dataset): }, ] * 2) task.wait_till_done() + + +def pytest_configure(): + pytest.report = defaultdict(int) + + +@pytest.hookimpl(hookwrapper=True) +def pytest_fixture_setup(fixturedef): + start = time.time() + yield + end = time.time() + + exec_time = end - start + if "FIXTURE_PROFILE" in os.environ: + pytest.report[fixturedef.argname] += exec_time + + +@pytest.fixture(scope='session', autouse=True) +def print_perf_summary(): + yield + + if "FIXTURE_PROFILE" in os.environ: + sorted_dict = dict( + sorted(pytest.report.items(), + key=lambda item: item[1], + reverse=True)) + num_of_entries = 10 if len(sorted_dict) >= 10 else len(sorted_dict) + slowest_fixtures = [(aaa, sorted_dict[aaa]) + for aaa in islice(sorted_dict, num_of_entries)] + print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr) diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py index d1a31e532..de2f15820 100644 --- a/tests/integration/test_dataset.py +++ b/tests/integration/test_dataset.py @@ -53,8 +53,12 @@ def dataset_for_filtering(client, rand_gen): yield name_1, d1, name_2, d2 - d1.delete() - d2.delete() + +def test_dataset_filtering(client, dataset_for_filtering): + name_1, d1, name_2, d2 = dataset_for_filtering + + assert list(client.get_datasets(where=Dataset.name == name_1)) == [d1] + assert list(client.get_datasets(where=Dataset.name == name_2)) == [d2] def test_dataset_filtering(client, dataset_for_filtering): diff --git a/tests/integration/test_filtering.py b/tests/integration/test_filtering.py index fde7f0638..7dd687759 100644 --- a/tests/integration/test_filtering.py +++ b/tests/integration/test_filtering.py @@ -26,7 +26,7 @@ def project_to_test_where(client, rand_gen): # other builds simultaneously adding projects to test org def test_where(client, project_to_test_where): p_a, p_b, p_c = project_to_test_where - p_a_name, p_b_name, _ = [p.name for p in [p_a, p_b, p_c]] + p_a_name, p_b_name = [p.name for p in [p_a, p_b]] def get(where=None): date_where = Project.created_at >= p_a.created_at diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index b9467e0dd..94c98ee50 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -171,15 +171,15 @@ def test_attach_instructions(client, project): @pytest.mark.skipif(condition=os.environ['LABELBOX_TEST_ENVIRON'] == "onprem", reason="new mutation does not work for onprem") -def test_html_instructions(configured_project): +def test_html_instructions(project_with_empty_ontology): html_file_path = '/tmp/instructions.html' sample_html_str = "" with open(html_file_path, 'w') as file: file.write(sample_html_str) - configured_project.upsert_instructions(html_file_path) - updated_ontology = configured_project.ontology().normalized + project_with_empty_ontology.upsert_instructions(html_file_path) + updated_ontology = project_with_empty_ontology.ontology().normalized instructions = updated_ontology.pop('projectInstructions') assert requests.get(instructions).text == sample_html_str @@ -200,10 +200,6 @@ def test_same_ontology_after_instructions( assert instructions is not None -def test_queue_mode(configured_project: Project): - assert configured_project.queue_mode == QueueMode.Batch - - def test_batches(project: Project, dataset: Dataset, image_url): task = dataset.create_data_rows([ { @@ -243,9 +239,9 @@ def test_create_batch_with_global_keys_async(project: Project, data_rows): assert batch_data_rows == set(data_rows) -def test_media_type(client, configured_project: Project, rand_gen): +def test_media_type(client, project: Project, rand_gen): # Existing project with no media_type - assert isinstance(configured_project.media_type, MediaType) + assert isinstance(project.media_type, MediaType) # Update test project = client.create_project(name=rand_gen(str)) diff --git a/tests/integration/test_user_and_org.py b/tests/integration/test_user_and_org.py index 9f07666de..ca158527c 100644 --- a/tests/integration/test_user_and_org.py +++ b/tests/integration/test_user_and_org.py @@ -1,3 +1,6 @@ +from labelbox.schema.project import Project + + def test_user(client): user = client.get_user() assert user.uid is not None @@ -10,14 +13,11 @@ def test_organization(client): assert client.get_user() in set(organization.users()) -def test_user_and_org_projects(project): - client = project.client +def test_user_and_org_projects(client, project): user = client.get_user() org = client.get_organization() - user_projects = set(user.projects()) - org_projects = set(org.projects()) + user_project = user.projects(where=Project.uid == project.uid) + org_project = org.projects(where=Project.uid == project.uid) - assert project.created_by() == user - assert project.organization() == org - assert project in user_projects - assert project in org_projects \ No newline at end of file + assert user_project + assert org_project \ No newline at end of file