From 48285e46b2ae509a8c1ccdc4141459ddd5ad006a Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Wed, 2 Aug 2023 16:20:23 -0700 Subject: [PATCH 01/10] Add instrumentation for fixtures(temp) --- .../integration/annotation_import/conftest.py | 13 ++++++- tests/integration/conftest.py | 36 +++++++++++++++++++ tests/integration/test_dataset.py | 8 +++-- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 6db398fe5..6e35d4d0a 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -9,6 +9,7 @@ from typing import Type from labelbox.schema.labeling_frontend import LabelingFrontend from labelbox.schema.annotation_import import LabelImport, AnnotationImportState +from labelbox.schema.project import Project from labelbox.schema.queue_mode import QueueMode DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 40 @@ -486,6 +487,7 @@ def initial_dataset(client, rand_gen): @pytest.fixture def configured_project(client, initial_dataset, ontology, rand_gen, image_url): + start_time = time.time() dataset = initial_dataset project = client.create_project( name=rand_gen(str), @@ -496,14 +498,21 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url): where=LabelingFrontend.name == "editor"))[0] project.setup(editor, ontology) data_row_ids = [] - + # print("Before creating data rows ", time.time() - start_time) + num_rows = 0 for _ in range(len(ontology['tools']) + len(ontology['classifications'])): data_row_ids.append(dataset.create_data_row(row_data=image_url).uid) + num_rows += 1 + # print("After creating data rows ", time.time() - start_time) + + pytest.data_row_report['times'] += time.time() - start_time + pytest.data_row_report['num_rows'] += num_rows project.create_batch( rand_gen(str), data_row_ids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) ) + print("After creating batch ", time.time() - start_time) project.data_row_ids = data_row_ids yield project project.delete() @@ -1006,6 +1015,7 @@ def model_run_with_training_metadata(rand_gen, model): @pytest.fixture def model_run_with_data_rows(client, configured_project, model_run_predictions, model_run, wait_for_label_processing): + start_time = time.time() configured_project.enable_model_assisted_labeling() upload_task = LabelImport.create_from_objects( @@ -1019,6 +1029,7 @@ def model_run_with_data_rows(client, configured_project, model_run_predictions, labels = wait_for_label_processing(configured_project) label_ids = [label.uid for label in labels] model_run.upsert_labels(label_ids) + print(f"model_run_with_data_rows: {time.time() - start_time}") yield model_run model_run.delete() # TODO: Delete resources when that is possible .. diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ed4229b4d..92e23a375 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,3 +1,5 @@ +from collections import defaultdict +from itertools import islice import json import os import re @@ -807,3 +809,37 @@ def upload_invalid_data_rows_for_dataset(dataset: Dataset): }, ] * 2) task.wait_till_done() + + +def pytest_configure(): + pytest.report = defaultdict(int) + pytest.data_row_report = {'times': 0, 'num_rows': 0} + + +@pytest.hookimpl(hookwrapper=True) +def pytest_fixture_setup(fixturedef, request): + start = time.time() + yield + + end = time.time() + + exec_time = end - start + pytest.report[fixturedef.argname] += exec_time + + # print('pytest_fixture_setup' + # f', request={request}' + # f', create_data_row_time={end - start}') + + +@pytest.fixture(scope='session', autouse=True) +def print_perf_summary(): + yield + + sorted_dict = dict( + sorted(pytest.report.items(), key=lambda item: item[1], reverse=True)) + num_of_entries = 10 if len(sorted_dict) >= 10 else len(sorted_dict) + slowest_fixtures = [ + (aaa, sorted_dict[aaa]) for aaa in islice(sorted_dict, num_of_entries) + ] + print("\nTop slowest fixtures:\n", slowest_fixtures) + print("Data row report:\n", pytest.data_row_report) diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py index d1a31e532..de2f15820 100644 --- a/tests/integration/test_dataset.py +++ b/tests/integration/test_dataset.py @@ -53,8 +53,12 @@ def dataset_for_filtering(client, rand_gen): yield name_1, d1, name_2, d2 - d1.delete() - d2.delete() + +def test_dataset_filtering(client, dataset_for_filtering): + name_1, d1, name_2, d2 = dataset_for_filtering + + assert list(client.get_datasets(where=Dataset.name == name_1)) == [d1] + assert list(client.get_datasets(where=Dataset.name == name_2)) == [d2] def test_dataset_filtering(client, dataset_for_filtering): From b95d1b89767c8ea6f085596315d056c8982b9f99 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Mon, 14 Aug 2023 09:16:26 -0700 Subject: [PATCH 02/10] Convert tests that do now require many data rows prebuilt to a simpler project fixture --- pytest.ini | 2 +- .../integration/annotation_import/conftest.py | 13 ++--- .../test_bulk_import_request.py | 49 +++++++++---------- .../annotation_import/test_data_types.py | 22 ++++----- tests/integration/conftest.py | 29 +++++++---- tests/integration/test_project.py | 14 ++---- 6 files changed, 64 insertions(+), 65 deletions(-) diff --git a/pytest.ini b/pytest.ini index b56afefdd..fbf64a864 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] -addopts = -s -vv --reruns 5 --reruns-delay 10 --durations=20 +addopts = -s -vv markers = slow: marks tests as slow (deselect with '-m "not slow"') diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 6e35d4d0a..1f88de47a 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -486,17 +486,12 @@ def initial_dataset(client, rand_gen): @pytest.fixture -def configured_project(client, initial_dataset, ontology, rand_gen, image_url): +def configured_project(client, configured_project_without_data_rows, + initial_dataset, ontology, rand_gen, image_url): start_time = time.time() dataset = initial_dataset - project = client.create_project( - name=rand_gen(str), - queue_mode=QueueMode.Batch, - ) - editor = list( - client.get_labeling_frontends( - where=LabelingFrontend.name == "editor"))[0] - project.setup(editor, ontology) + project = configured_project_without_data_rows + data_row_ids = [] # print("Before creating data rows ", time.time() - start_time) num_rows = 0 diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index 4f001af8d..7a66dd667 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -25,15 +25,15 @@ """ -def test_create_from_url(configured_project): +def test_create_from_url(project): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - bulk_import_request = configured_project.upload_annotations(name=name, - annotations=url, - validate=False) + bulk_import_request = project.upload_annotations(name=name, + annotations=url, + validate=False) - assert bulk_import_request.project() == configured_project + assert bulk_import_request.project() == project assert bulk_import_request.name == name assert bulk_import_request.input_file_url == url assert bulk_import_request.error_file_url is None @@ -41,24 +41,24 @@ def test_create_from_url(configured_project): assert bulk_import_request.state == BulkImportRequestState.RUNNING -def test_validate_file(configured_project): +def test_validate_file(project_with_ontology): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" with pytest.raises(MALValidationError): - configured_project.upload_annotations(name=name, - annotations=url, - validate=True) + project_with_ontology.upload_annotations(name=name, + annotations=url, + validate=True) #Schema ids shouldn't match -def test_create_from_objects(configured_project, predictions, +def test_create_from_objects(configured_project_without_data_rows, predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) - bulk_import_request = configured_project.upload_annotations( + bulk_import_request = configured_project_without_data_rows.upload_annotations( name=name, annotations=predictions) - assert bulk_import_request.project() == configured_project + assert bulk_import_request.project() == configured_project_without_data_rows assert bulk_import_request.name == name assert bulk_import_request.error_file_url is None assert bulk_import_request.status_file_url is None @@ -105,17 +105,17 @@ def test_create_from_local_file(tmp_path, predictions, configured_project, bulk_import_request.input_file_url, predictions) -def test_get(client, configured_project): +def test_get(client, configured_project_without_data_rows): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - configured_project.upload_annotations(name=name, - annotations=url, - validate=False) + configured_project_without_data_rows.upload_annotations(name=name, + annotations=url, + validate=False) bulk_import_request = BulkImportRequest.from_name( - client, project_id=configured_project.uid, name=name) + client, project_id=configured_project_without_data_rows.uid, name=name) - assert bulk_import_request.project() == configured_project + assert bulk_import_request.project() == configured_project_without_data_rows assert bulk_import_request.name == name assert bulk_import_request.input_file_url == url assert bulk_import_request.error_file_url is None @@ -158,14 +158,13 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): @pytest.mark.slow -def test_wait_till_done(rectangle_inference, configured_project): +def test_wait_till_done(rectangle_inference, + configured_project_without_data_rows): name = str(uuid.uuid4()) - url = configured_project.client.upload_data(content=parser.dumps( - [rectangle_inference]), - sign=True) - bulk_import_request = configured_project.upload_annotations(name=name, - annotations=url, - validate=False) + url = configured_project_without_data_rows.client.upload_data( + content=parser.dumps([rectangle_inference]), sign=True) + bulk_import_request = configured_project_without_data_rows.upload_annotations( + name=name, annotations=url, validate=False) assert len(bulk_import_request.inputs) == 1 bulk_import_request.wait_until_done() diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py index f8e392cf5..30559198b 100644 --- a/tests/integration/annotation_import/test_data_types.py +++ b/tests/integration/annotation_import/test_data_types.py @@ -125,7 +125,6 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name): [data_row.uid], # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) ) - project.data_row_ids.append(data_row.uid) return data_row @@ -135,12 +134,12 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name): AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData ]) -def test_import_data_types(client, configured_project, initial_dataset, - rand_gen, data_row_json_by_data_type, - annotations_by_data_type, data_type_class): +def test_import_data_types(client, project, initial_dataset, rand_gen, + data_row_json_by_data_type, annotations_by_data_type, + data_type_class): - project = configured_project - project_id = configured_project.uid + project = project + project_id = project.uid dataset = initial_dataset set_project_media_type_from_data_type(project, data_type_class) @@ -261,11 +260,11 @@ def test_import_data_types_v2(client, configured_project, initial_dataset, @pytest.mark.parametrize('data_type, data_class, annotations', test_params) -def test_import_label_annotations(client, configured_project, initial_dataset, - data_row_json_by_data_type, data_type, - data_class, annotations, rand_gen): +def test_import_label_annotations(client, configured_project_without_data_rows, + initial_dataset, data_row_json_by_data_type, + data_type, data_class, annotations, rand_gen): - project = configured_project + project = configured_project_without_data_rows dataset = initial_dataset set_project_media_type_from_data_type(project, data_class) @@ -297,7 +296,8 @@ def test_import_label_annotations(client, configured_project, initial_dataset, assert export_task.errors is None expected_annotations = get_annotation_comparison_dicts_from_labels(labels) actual_annotations = get_annotation_comparison_dicts_from_export( - export_task.result, data_row.uid, configured_project.uid) + export_task.result, data_row.uid, + configured_project_without_data_rows.uid) assert actual_annotations == expected_annotations data_row.delete() diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 92e23a375..c47524ed6 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -3,6 +3,7 @@ import json import os import re +import sys import time import uuid from enum import Enum @@ -390,9 +391,21 @@ def initial_dataset(client, rand_gen): @pytest.fixture -def configured_project(project, initial_dataset, client, rand_gen, image_url): +def project_with_ontology(project): + editor = list( + project.client.get_labeling_frontends( + where=LabelingFrontend.name == "editor"))[0] + empty_ontology = {"tools": [], "classifications": []} + project.setup(editor, empty_ontology) + yield project + + +@pytest.fixture +def configured_project(project_with_ontology, initial_dataset, rand_gen, + image_url): dataset = initial_dataset data_row_id = dataset.create_data_row(row_data=image_url).uid + project = project_with_ontology project.create_batch( rand_gen(str), @@ -401,14 +414,7 @@ def configured_project(project, initial_dataset, client, rand_gen, image_url): ) project.data_row_ids = [data_row_id] - editor = list( - project.client.get_labeling_frontends( - where=LabelingFrontend.name == "editor"))[0] - empty_ontology = {"tools": [], "classifications": []} - project.setup(editor, empty_ontology) yield project - dataset.delete() - project.delete() @pytest.fixture @@ -833,6 +839,8 @@ def pytest_fixture_setup(fixturedef, request): @pytest.fixture(scope='session', autouse=True) def print_perf_summary(): + print("Starting measurements\n", file=sys.stderr) + yield sorted_dict = dict( @@ -841,5 +849,6 @@ def print_perf_summary(): slowest_fixtures = [ (aaa, sorted_dict[aaa]) for aaa in islice(sorted_dict, num_of_entries) ] - print("\nTop slowest fixtures:\n", slowest_fixtures) - print("Data row report:\n", pytest.data_row_report) + print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr) + print("Data row report:\n", pytest.data_row_report, file=sys.stderr) + # assert False diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index b9467e0dd..b3b683a3d 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -171,15 +171,15 @@ def test_attach_instructions(client, project): @pytest.mark.skipif(condition=os.environ['LABELBOX_TEST_ENVIRON'] == "onprem", reason="new mutation does not work for onprem") -def test_html_instructions(configured_project): +def test_html_instructions(project_with_ontology): html_file_path = '/tmp/instructions.html' sample_html_str = "" with open(html_file_path, 'w') as file: file.write(sample_html_str) - configured_project.upsert_instructions(html_file_path) - updated_ontology = configured_project.ontology().normalized + project_with_ontology.upsert_instructions(html_file_path) + updated_ontology = project_with_ontology.ontology().normalized instructions = updated_ontology.pop('projectInstructions') assert requests.get(instructions).text == sample_html_str @@ -200,10 +200,6 @@ def test_same_ontology_after_instructions( assert instructions is not None -def test_queue_mode(configured_project: Project): - assert configured_project.queue_mode == QueueMode.Batch - - def test_batches(project: Project, dataset: Dataset, image_url): task = dataset.create_data_rows([ { @@ -243,9 +239,9 @@ def test_create_batch_with_global_keys_async(project: Project, data_rows): assert batch_data_rows == set(data_rows) -def test_media_type(client, configured_project: Project, rand_gen): +def test_media_type(client, project: Project, rand_gen): # Existing project with no media_type - assert isinstance(configured_project.media_type, MediaType) + assert isinstance(project.media_type, MediaType) # Update test project = client.create_project(name=rand_gen(str)) From 4976908f25ece76697cda14efe952d061f69a92c Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 15 Aug 2023 15:05:36 -0700 Subject: [PATCH 03/10] Adding an option to configure source of data rows for predictions, also making ndjson test use project without datatows --- .../integration/annotation_import/conftest.py | 81 +++++++++- .../test_ndjson_validation.py | 138 +++++++++++------- tests/integration/conftest.py | 2 - 3 files changed, 158 insertions(+), 63 deletions(-) diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 1f88de47a..ca34d2dfb 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -486,8 +486,27 @@ def initial_dataset(client, rand_gen): @pytest.fixture -def configured_project(client, configured_project_without_data_rows, - initial_dataset, ontology, rand_gen, image_url): +def hardcoded_datarow_id(): + data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q' + + def get_data_row_id(indx=0): + return data_row_id + + yield get_data_row_id + + +@pytest.fixture +def configured_project_datarow_id(configured_project): + + def get_data_row_id(indx=0): + return configured_project.data_row_ids[indx] + + yield get_data_row_id + + +@pytest.fixture +def configured_project(configured_project_without_data_rows, initial_dataset, + ontology, rand_gen, image_url): start_time = time.time() dataset = initial_dataset project = configured_project_without_data_rows @@ -509,6 +528,7 @@ def configured_project(client, configured_project_without_data_rows, ) print("After creating batch ", time.time() - start_time) project.data_row_ids = data_row_ids + yield project project.delete() @@ -577,10 +597,19 @@ def configured_project_without_data_rows(client, ontology, rand_gen): # In an example of a 'rectangle' we have extended to support multiple instances of the same tool type # TODO: we will support this approach in the future for all tools @pytest.fixture -def prediction_id_mapping(configured_project): +def prediction_id_mapping(configured_project_without_data_rows, ontology, + request): # Maps tool types to feature schema ids - project = configured_project + if 'configured_project' in request.fixturenames: + data_row_id_factory = request.getfixturevalue( + 'configured_project_datarow_id') + project = configured_project + else: + data_row_id_factory = request.getfixturevalue('hardcoded_datarow_id') + project = configured_project_without_data_rows + ontology = project.ontology().normalized + result = {} for idx, tool in enumerate(ontology['tools'] + ontology['classifications']): @@ -597,7 +626,47 @@ def prediction_id_mapping(configured_project): "schemaId": tool['featureSchemaId'], "name": tool['name'], "dataRow": { - "id": project.data_row_ids[idx], + "id": data_row_id_factory(idx), + }, + 'tool': tool + } + if tool_type not in result: + result[tool_type] = [] + result[tool_type].append(value) + else: + result[tool_type] = { + "uuid": str(uuid.uuid4()), + "schemaId": tool['featureSchemaId'], + "name": tool['name'], + "dataRow": { + "id": data_row_id_factory(idx), + }, + 'tool': tool + } + return result + + +@pytest.fixture +def prediction_id_mapping_datarow_id(): + # Maps tool types to feature schema ids + data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q' + result = {} + + for _, tool in enumerate(ontology['tools'] + ontology['classifications']): + if 'tool' in tool: + tool_type = tool['tool'] + else: + tool_type = tool[ + 'type'] if 'scope' not in tool else f"{tool['type']}_{tool['scope']}" # so 'checklist' of 'checklist_index' + + # TODO: remove this once we have a better way to associate multiple tools instances with a single tool type + if tool_type == 'rectangle': + value = { + "uuid": str(uuid.uuid4()), + "schemaId": tool['featureSchemaId'], + "name": tool['name'], + "dataRow": { + "id": data_row_id, }, 'tool': tool } @@ -610,7 +679,7 @@ def prediction_id_mapping(configured_project): "schemaId": tool['featureSchemaId'], "name": tool['name'], "dataRow": { - "id": project.data_row_ids[idx], + "id": data_row_id, }, 'tool': tool } diff --git a/tests/integration/annotation_import/test_ndjson_validation.py b/tests/integration/annotation_import/test_ndjson_validation.py index 53bb85eed..466968e85 100644 --- a/tests/integration/annotation_import/test_ndjson_validation.py +++ b/tests/integration/annotation_import/test_ndjson_validation.py @@ -9,6 +9,24 @@ NDRadio, NDRectangle, NDText, NDTextEntity, NDTool, _validate_ndjson) +from labelbox.schema.labeling_frontend import LabelingFrontend +from labelbox.schema.queue_mode import QueueMode + + +@pytest.fixture +def configured_project_without_data_rows(client, + ontology, + rand_gen, + scope="module"): + project = client.create_project(name=rand_gen(str), + description=rand_gen(str), + queue_mode=QueueMode.Batch) + editor = list( + client.get_labeling_frontends( + where=LabelingFrontend.name == "editor"))[0] + project.setup(editor, ontology) + yield project + project.delete() def test_classification_construction(checklist_inference, text_inference): @@ -37,187 +55,198 @@ def test_tool_construction(inference, expected_type): def test_incorrect_feature_schema(rectangle_inference, polygon_inference, - configured_project): + configured_project_without_data_rows): #Valid but incorrect feature schema #Prob the error message says something about the config not anything useful. We might want to fix this. pred = rectangle_inference.copy() pred['schemaId'] = polygon_inference['schemaId'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def no_tool(text_inference, configured_project): +def no_tool(text_inference, configured_project_without_data_rows): pred = text_inference.copy() #Missing key del pred['answer'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_invalid_text(text_inference, configured_project): +def test_invalid_text(text_inference, configured_project_without_data_rows): #and if it is not a string pred = text_inference.copy() #Extra and wrong key del pred['answer'] pred['answers'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) del pred['answers'] #Invalid type pred['answer'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) #Invalid type pred['answer'] = None with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_invalid_checklist_item(checklist_inference, configured_project): +def test_invalid_checklist_item(checklist_inference, + configured_project_without_data_rows): #Only two points pred = checklist_inference.copy() pred['answers'] = [pred['answers'][0], pred['answers'][0]] #Duplicate schema ids with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) pred['answers'] = [{"name": "asdfg"}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) pred['answers'] = [{"schemaId": "1232132132"}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) pred['answers'] = [{}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) pred['answers'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) del pred['answers'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_invalid_polygon(polygon_inference, configured_project): +def test_invalid_polygon(polygon_inference, + configured_project_without_data_rows): #Only two points pred = polygon_inference.copy() pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_incorrect_entity(entity_inference, configured_project): +def test_incorrect_entity(entity_inference, + configured_project_without_data_rows): entity = entity_inference.copy() #Location cannot be a list entity["location"] = [0, 10] with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) + _validate_ndjson([entity], configured_project_without_data_rows) entity["location"] = {"start": -1, "end": 5} with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) + _validate_ndjson([entity], configured_project_without_data_rows) entity["location"] = {"start": 15, "end": 5} with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project) + _validate_ndjson([entity], configured_project_without_data_rows) -def test_incorrect_mask(segmentation_inference, configured_project): +def test_incorrect_mask(segmentation_inference, + configured_project_without_data_rows): seg = segmentation_inference.copy() seg['mask']['colorRGB'] = [-1, 0, 10] with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project) + _validate_ndjson([seg], configured_project_without_data_rows) seg['mask']['colorRGB'] = [0, 0] with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project) + _validate_ndjson([seg], configured_project_without_data_rows) seg['mask'] = {'counts': [0], 'size': [0, 1]} with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project) + _validate_ndjson([seg], configured_project_without_data_rows) seg['mask'] = {'counts': [-1], 'size': [1, 1]} with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project) + _validate_ndjson([seg], configured_project_without_data_rows) -def test_all_validate_json(configured_project, predictions): +def test_all_validate_json(configured_project_without_data_rows, predictions): #Predictions contains one of each type of prediction. #These should be properly formatted and pass. - _validate_ndjson(predictions, configured_project) + _validate_ndjson(predictions, configured_project_without_data_rows) -def test_incorrect_line(line_inference, configured_project): +def test_incorrect_line(line_inference, configured_project_without_data_rows): line = line_inference.copy() line["line"] = [line["line"][0]] #Just one point with pytest.raises(MALValidationError): - _validate_ndjson([line], configured_project) + _validate_ndjson([line], configured_project_without_data_rows) -def test_incorrect_rectangle(rectangle_inference, configured_project): +def test_incorrect_rectangle(rectangle_inference, + configured_project_without_data_rows): del rectangle_inference['bbox']['top'] with pytest.raises(MALValidationError): - _validate_ndjson([rectangle_inference], configured_project) + _validate_ndjson([rectangle_inference], + configured_project_without_data_rows) -def test_duplicate_tools(rectangle_inference, configured_project): +def test_duplicate_tools(rectangle_inference, + configured_project_without_data_rows): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_invalid_feature_schema(configured_project, rectangle_inference): +def test_invalid_feature_schema(configured_project_without_data_rows, + rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() pred['schemaId'] = "blahblah" with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_name_only_feature_schema(configured_project, rectangle_inference): +def test_name_only_feature_schema(configured_project_without_data_rows, + rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['schemaId'] - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_schema_id_only_feature_schema(configured_project, rectangle_inference): +def test_schema_id_only_feature_schema(configured_project_without_data_rows, + rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['name'] - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_missing_feature_schema(configured_project, rectangle_inference): +def test_missing_feature_schema(configured_project_without_data_rows, + rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['schemaId'] del pred['name'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) -def test_validate_ndjson(tmp_path, configured_project): +def test_validate_ndjson(tmp_path, configured_project_without_data_rows): file_name = f"broken.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: f.write("test") with pytest.raises(ValueError): - configured_project.upload_annotations(name="name", - annotations=str(file_path), - validate=True) + configured_project_without_data_rows.upload_annotations( + name="name", annotations=str(file_path), validate=True) -def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): +def test_validate_ndjson_uuid(tmp_path, configured_project_without_data_rows, + predictions): file_name = f"repeat_uuid.ndjson" file_path = tmp_path / file_name repeat_uuid = predictions.copy() @@ -228,16 +257,15 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): parser.dump(repeat_uuid, f) with pytest.raises(MALValidationError): - configured_project.upload_annotations(name="name", - validate=True, - annotations=str(file_path)) + configured_project_without_data_rows.upload_annotations( + name="name", validate=True, annotations=str(file_path)) with pytest.raises(MALValidationError): - configured_project.upload_annotations(name="name", - validate=True, - annotations=repeat_uuid) + configured_project_without_data_rows.upload_annotations( + name="name", validate=True, annotations=repeat_uuid) -def test_video_upload(video_checklist_inference, configured_project): +def test_video_upload(video_checklist_inference, + configured_project_without_data_rows): pred = video_checklist_inference.copy() - _validate_ndjson([pred], configured_project) + _validate_ndjson([pred], configured_project_without_data_rows) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c47524ed6..82c739ddb 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -839,8 +839,6 @@ def pytest_fixture_setup(fixturedef, request): @pytest.fixture(scope='session', autouse=True) def print_perf_summary(): - print("Starting measurements\n", file=sys.stderr) - yield sorted_dict = dict( From 551c1efe0cc87b60c1b2f6509f613b75cb432d4b Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 15 Aug 2023 15:24:56 -0700 Subject: [PATCH 04/10] Replacing configured_project --- pytest.ini | 2 +- tests/conftest.py | 2 +- .../integration/annotation_import/conftest.py | 120 ++++++++--------- .../test_bulk_import_request.py | 41 +++--- .../test_conversation_import.py | 6 +- .../annotation_import/test_data_types.py | 23 ++-- .../annotation_import/test_label_import.py | 19 ++- .../annotation_import/test_model.py | 4 +- .../annotation_import/test_model_run.py | 14 +- .../test_ndjson_validation.py | 121 +++++++++--------- .../test_upsert_prediction_import.py | 18 +-- tests/integration/conftest.py | 15 ++- tests/integration/export_v2/conftest.py | 2 +- .../export_v2/test_export_video.py | 4 +- 14 files changed, 194 insertions(+), 197 deletions(-) diff --git a/pytest.ini b/pytest.ini index fbf64a864..b56afefdd 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] -addopts = -s -vv +addopts = -s -vv --reruns 5 --reruns-delay 10 --durations=20 markers = slow: marks tests as slow (deselect with '-m "not slow"') diff --git a/tests/conftest.py b/tests/conftest.py index b4dd6dce0..b724426d8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ ] -@pytest.fixture +@pytest.fixture(scope="session") def rand_gen(): def gen(field_type): diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index ca34d2dfb..3f1cd7de5 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -211,7 +211,7 @@ def annotations_by_data_type_v2( } -@pytest.fixture +@pytest.fixture(scope='session') def ontology(): bbox_tool_with_nested_text = { 'required': @@ -479,48 +479,45 @@ def func(project): @pytest.fixture -def initial_dataset(client, rand_gen): - dataset = client.create_dataset(name=rand_gen(str)) - yield dataset - dataset.delete() - - -@pytest.fixture -def hardcoded_datarow_id(): - data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q' +def configured_project_datarow_id(configured_project): def get_data_row_id(indx=0): - return data_row_id + return configured_project.data_row_ids[indx] yield get_data_row_id @pytest.fixture -def configured_project_datarow_id(configured_project): +def configured_project_one_datarow_id(configured_project_with_one_data_row): def get_data_row_id(indx=0): - return configured_project.data_row_ids[indx] + return configured_project_with_one_data_row.data_row_ids[0] yield get_data_row_id @pytest.fixture -def configured_project(configured_project_without_data_rows, initial_dataset, - ontology, rand_gen, image_url): +def configured_project(client, initial_dataset, ontology, rand_gen, image_url): start_time = time.time() dataset = initial_dataset - project = configured_project_without_data_rows + project = client.create_project(name=rand_gen(str), + queue_mode=QueueMode.Batch) + editor = list( + client.get_labeling_frontends( + where=LabelingFrontend.name == "editor"))[0] + project.setup(editor, ontology) + num_rows = 0 data_row_ids = [] - # print("Before creating data rows ", time.time() - start_time) - num_rows = 0 + for _ in range(len(ontology['tools']) + len(ontology['classifications'])): data_row_ids.append(dataset.create_data_row(row_data=image_url).uid) num_rows += 1 - # print("After creating data rows ", time.time() - start_time) - - pytest.data_row_report['times'] += time.time() - start_time - pytest.data_row_report['num_rows'] += num_rows + project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids, + sleep_interval=3) + if pytest.data_row_report: + pytest.data_row_report['times'] += time.time() - start_time + pytest.data_row_report['num_rows'] += num_rows project.create_batch( rand_gen(str), data_row_ids, # sample of data row objects @@ -580,7 +577,10 @@ def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row, @pytest.fixture -def configured_project_without_data_rows(client, ontology, rand_gen): +def configured_project_with_one_data_row(client, ontology, rand_gen, + initial_dataset, image_url): + start_time = time.time() + project = client.create_project(name=rand_gen(str), description=rand_gen(str), queue_mode=QueueMode.Batch) @@ -588,7 +588,25 @@ def configured_project_without_data_rows(client, ontology, rand_gen): client.get_labeling_frontends( where=LabelingFrontend.name == "editor"))[0] project.setup(editor, ontology) + + data_row = initial_dataset.create_data_row(row_data=image_url) + data_row_ids = [data_row.uid] + project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids, + sleep_interval=3) + + if pytest.data_row_report: + pytest.data_row_report['times'] += time.time() - start_time + pytest.data_row_report['num_rows'] += 1 + batch = project.create_batch( + rand_gen(str), + data_row_ids, # sample of data row objects + 5 # priority between 1(Highest) - 5(lowest) + ) + project.data_row_ids = data_row_ids + yield project + + batch.delete() project.delete() @@ -597,16 +615,20 @@ def configured_project_without_data_rows(client, ontology, rand_gen): # In an example of a 'rectangle' we have extended to support multiple instances of the same tool type # TODO: we will support this approach in the future for all tools @pytest.fixture -def prediction_id_mapping(configured_project_without_data_rows, ontology, - request): +def prediction_id_mapping(ontology, request): # Maps tool types to feature schema ids if 'configured_project' in request.fixturenames: data_row_id_factory = request.getfixturevalue( 'configured_project_datarow_id') - project = configured_project - else: + project = request.getfixturevalue('configured_project') + elif 'hardcoded_datarow_id' in request.fixturenames: data_row_id_factory = request.getfixturevalue('hardcoded_datarow_id') - project = configured_project_without_data_rows + project = request.getfixturevalue('configured_project_with_ontology') + else: + data_row_id_factory = request.getfixturevalue( + 'configured_project_one_datarow_id') + project = request.getfixturevalue( + 'configured_project_with_one_data_row') ontology = project.ontology().normalized @@ -646,46 +668,6 @@ def prediction_id_mapping(configured_project_without_data_rows, ontology, return result -@pytest.fixture -def prediction_id_mapping_datarow_id(): - # Maps tool types to feature schema ids - data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q' - result = {} - - for _, tool in enumerate(ontology['tools'] + ontology['classifications']): - if 'tool' in tool: - tool_type = tool['tool'] - else: - tool_type = tool[ - 'type'] if 'scope' not in tool else f"{tool['type']}_{tool['scope']}" # so 'checklist' of 'checklist_index' - - # TODO: remove this once we have a better way to associate multiple tools instances with a single tool type - if tool_type == 'rectangle': - value = { - "uuid": str(uuid.uuid4()), - "schemaId": tool['featureSchemaId'], - "name": tool['name'], - "dataRow": { - "id": data_row_id, - }, - 'tool': tool - } - if tool_type not in result: - result[tool_type] = [] - result[tool_type].append(value) - else: - result[tool_type] = { - "uuid": str(uuid.uuid4()), - "schemaId": tool['featureSchemaId'], - "name": tool['name'], - "dataRow": { - "id": data_row_id, - }, - 'tool': tool - } - return result - - @pytest.fixture def polygon_inference(prediction_id_mapping): polygon = prediction_id_mapping['polygon'].copy() @@ -1079,7 +1061,6 @@ def model_run_with_training_metadata(rand_gen, model): @pytest.fixture def model_run_with_data_rows(client, configured_project, model_run_predictions, model_run, wait_for_label_processing): - start_time = time.time() configured_project.enable_model_assisted_labeling() upload_task = LabelImport.create_from_objects( @@ -1093,7 +1074,6 @@ def model_run_with_data_rows(client, configured_project, model_run_predictions, labels = wait_for_label_processing(configured_project) label_ids = [label.uid for label in labels] model_run.upsert_labels(label_ids) - print(f"model_run_with_data_rows: {time.time() - start_time}") yield model_run model_run.delete() # TODO: Delete resources when that is possible .. diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index 7a66dd667..6691cc044 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -51,14 +51,14 @@ def test_validate_file(project_with_ontology): #Schema ids shouldn't match -def test_create_from_objects(configured_project_without_data_rows, predictions, +def test_create_from_objects(configured_project_with_one_data_row, predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) - bulk_import_request = configured_project_without_data_rows.upload_annotations( + bulk_import_request = configured_project_with_one_data_row.upload_annotations( name=name, annotations=predictions) - assert bulk_import_request.project() == configured_project_without_data_rows + assert bulk_import_request.project() == configured_project_with_one_data_row assert bulk_import_request.name == name assert bulk_import_request.error_file_url is None assert bulk_import_request.status_file_url is None @@ -105,17 +105,17 @@ def test_create_from_local_file(tmp_path, predictions, configured_project, bulk_import_request.input_file_url, predictions) -def test_get(client, configured_project_without_data_rows): +def test_get(client, configured_project_with_one_data_row): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" - configured_project_without_data_rows.upload_annotations(name=name, + configured_project_with_one_data_row.upload_annotations(name=name, annotations=url, validate=False) bulk_import_request = BulkImportRequest.from_name( - client, project_id=configured_project_without_data_rows.uid, name=name) + client, project_id=configured_project_with_one_data_row.uid, name=name) - assert bulk_import_request.project() == configured_project_without_data_rows + assert bulk_import_request.project() == configured_project_with_one_data_row assert bulk_import_request.name == name assert bulk_import_request.input_file_url == url assert bulk_import_request.error_file_url is None @@ -123,16 +123,15 @@ def test_get(client, configured_project_without_data_rows): assert bulk_import_request.state == BulkImportRequestState.RUNNING -def test_validate_ndjson(tmp_path, configured_project): +def test_validate_ndjson(tmp_path, configured_project_with_one_data_row): file_name = f"broken.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: f.write("test") with pytest.raises(ValueError): - configured_project.upload_annotations(name="name", - validate=True, - annotations=str(file_path)) + configured_project_with_one_data_row.upload_annotations( + name="name", validate=True, annotations=str(file_path)) def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): @@ -159,11 +158,11 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions): @pytest.mark.slow def test_wait_till_done(rectangle_inference, - configured_project_without_data_rows): + configured_project_with_one_data_row): name = str(uuid.uuid4()) - url = configured_project_without_data_rows.client.upload_data( + url = configured_project_with_one_data_row.client.upload_data( content=parser.dumps([rectangle_inference]), sign=True) - bulk_import_request = configured_project_without_data_rows.upload_annotations( + bulk_import_request = configured_project_with_one_data_row.upload_annotations( name=name, annotations=url, validate=False) assert len(bulk_import_request.inputs) == 1 @@ -298,7 +297,7 @@ def test_pdf_mal_bbox(client, configured_project_pdf): assert import_annotations.errors == [] -def test_pdf_document_entity(client, configured_project_without_data_rows, +def test_pdf_document_entity(client, configured_project_with_one_data_row, dataset_pdf_entity, rand_gen): # for content "Metal-insulator (MI) transitions have been one of the" in OCR JSON extract tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json document_text_selection = DocumentTextSelection( @@ -322,7 +321,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows, labels = [] _, data_row_uids = dataset_pdf_entity - configured_project_without_data_rows.create_batch( + configured_project_with_one_data_row.create_batch( rand_gen(str), data_row_uids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) @@ -337,7 +336,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows, import_annotations = MALPredictionImport.create_from_objects( client=client, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, name=f"import {str(uuid.uuid4())}", predictions=labels) import_annotations.wait_until_done() @@ -346,14 +345,14 @@ def test_pdf_document_entity(client, configured_project_without_data_rows, def test_nested_video_object_annotations(client, - configured_project_without_data_rows, + configured_project_with_one_data_row, video_data, bbox_video_annotation_objects, rand_gen): labels = [] _, data_row_uids = video_data - configured_project_without_data_rows.update(media_type=MediaType.Video) - configured_project_without_data_rows.create_batch( + configured_project_with_one_data_row.update(media_type=MediaType.Video) + configured_project_with_one_data_row.create_batch( rand_gen(str), data_row_uids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) @@ -365,7 +364,7 @@ def test_nested_video_object_annotations(client, annotations=bbox_video_annotation_objects)) import_annotations = MALPredictionImport.create_from_objects( client=client, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, name=f"import {str(uuid.uuid4())}", predictions=labels) import_annotations.wait_until_done() diff --git a/tests/integration/annotation_import/test_conversation_import.py b/tests/integration/annotation_import/test_conversation_import.py index ac2d5419c..9f1d26e31 100644 --- a/tests/integration/annotation_import/test_conversation_import.py +++ b/tests/integration/annotation_import/test_conversation_import.py @@ -7,7 +7,7 @@ from labelbox.schema.annotation_import import MALPredictionImport -def test_conversation_entity(client, configured_project_without_data_rows, +def test_conversation_entity(client, configured_project_with_one_data_row, dataset_conversation_entity, rand_gen): conversation_entity_annotation = ConversationEntity(start=0, @@ -20,7 +20,7 @@ def test_conversation_entity(client, configured_project_without_data_rows, labels = [] _, data_row_uids = dataset_conversation_entity - configured_project_without_data_rows.create_batch( + configured_project_with_one_data_row.create_batch( rand_gen(str), data_row_uids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) @@ -35,7 +35,7 @@ def test_conversation_entity(client, configured_project_without_data_rows, import_annotations = MALPredictionImport.create_from_objects( client=client, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, name=f"import {str(uuid.uuid4())}", predictions=labels) diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py index 30559198b..5de79f5cc 100644 --- a/tests/integration/annotation_import/test_data_types.py +++ b/tests/integration/annotation_import/test_data_types.py @@ -125,6 +125,7 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name): [data_row.uid], # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) ) + project.data_row_ids.append(data_row.uid) return data_row @@ -134,11 +135,11 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name): AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData ]) -def test_import_data_types(client, project, initial_dataset, rand_gen, - data_row_json_by_data_type, annotations_by_data_type, - data_type_class): +def test_import_data_types(client, configured_project, initial_dataset, + rand_gen, data_row_json_by_data_type, + annotations_by_data_type, data_type_class): - project = project + project = configured_project project_id = project.uid dataset = initial_dataset @@ -260,11 +261,11 @@ def test_import_data_types_v2(client, configured_project, initial_dataset, @pytest.mark.parametrize('data_type, data_class, annotations', test_params) -def test_import_label_annotations(client, configured_project_without_data_rows, +def test_import_label_annotations(client, configured_project_with_one_data_row, initial_dataset, data_row_json_by_data_type, data_type, data_class, annotations, rand_gen): - project = configured_project_without_data_rows + project = configured_project_with_one_data_row dataset = initial_dataset set_project_media_type_from_data_type(project, data_class) @@ -297,13 +298,13 @@ def test_import_label_annotations(client, configured_project_without_data_rows, expected_annotations = get_annotation_comparison_dicts_from_labels(labels) actual_annotations = get_annotation_comparison_dicts_from_export( export_task.result, data_row.uid, - configured_project_without_data_rows.uid) + configured_project_with_one_data_row.uid) assert actual_annotations == expected_annotations data_row.delete() @pytest.mark.parametrize('data_type, data_class, annotations', test_params) -def test_import_mal_annotations(client, configured_project_without_data_rows, +def test_import_mal_annotations(client, configured_project_with_one_data_row, data_row_json_by_data_type, data_type, data_class, annotations, rand_gen): @@ -311,10 +312,10 @@ def test_import_mal_annotations(client, configured_project_without_data_rows, data_row_json = data_row_json_by_data_type[data_type] data_row = dataset.create_data_row(data_row_json) - set_project_media_type_from_data_type(configured_project_without_data_rows, + set_project_media_type_from_data_type(configured_project_with_one_data_row, data_class) - configured_project_without_data_rows.create_batch( + configured_project_with_one_data_row.create_batch( rand_gen(str), [data_row.uid], ) @@ -326,7 +327,7 @@ def test_import_mal_annotations(client, configured_project_without_data_rows, import_annotations = lb.MALPredictionImport.create_from_objects( client=client, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, name=f"import {str(uuid.uuid4())}", predictions=labels) import_annotations.wait_until_done() diff --git a/tests/integration/annotation_import/test_label_import.py b/tests/integration/annotation_import/test_label_import.py index 198ce2e3e..61c602c52 100644 --- a/tests/integration/annotation_import/test_label_import.py +++ b/tests/integration/annotation_import/test_label_import.py @@ -9,13 +9,16 @@ """ -def test_create_from_url(client, configured_project, +def test_create_from_url(client, configured_project_with_one_data_row, annotation_import_test_helpers): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" label_import = LabelImport.create_from_url( - client=client, project_id=configured_project.uid, name=name, url=url) - assert label_import.parent_id == configured_project.uid + client=client, + project_id=configured_project_with_one_data_row.uid, + name=name, + url=url) + assert label_import.parent_id == configured_project_with_one_data_row.uid annotation_import_test_helpers.check_running_state(label_import, name, url) @@ -52,13 +55,17 @@ def test_create_from_objects(client, configured_project, object_predictions, # annotation_import_test_helpers.assert_file_content(label_import.input_file_url, object_predictions) -def test_get(client, configured_project, annotation_import_test_helpers): +def test_get(client, configured_project_with_one_data_row, + annotation_import_test_helpers): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" label_import = LabelImport.create_from_url( - client=client, project_id=configured_project.uid, name=name, url=url) + client=client, + project_id=configured_project_with_one_data_row.uid, + name=name, + url=url) - assert label_import.parent_id == configured_project.uid + assert label_import.parent_id == configured_project_with_one_data_row.uid annotation_import_test_helpers.check_running_state(label_import, name, url) diff --git a/tests/integration/annotation_import/test_model.py b/tests/integration/annotation_import/test_model.py index dcfe9ef2c..131ecd9d0 100644 --- a/tests/integration/annotation_import/test_model.py +++ b/tests/integration/annotation_import/test_model.py @@ -4,14 +4,14 @@ from labelbox.exceptions import ResourceNotFoundError -def test_model(client, configured_project, rand_gen): +def test_model(client, configured_project_with_one_data_row, rand_gen): # Get all models = list(client.get_models()) for m in models: assert isinstance(m, Model) # Create - ontology = configured_project.ontology() + ontology = configured_project_with_one_data_row.ontology() data = {"name": rand_gen(str), "ontology_id": ontology.uid} model = client.create_model(data["name"], data["ontology_id"]) assert model.name == data["name"] diff --git a/tests/integration/annotation_import/test_model_run.py b/tests/integration/annotation_import/test_model_run.py index c94c78cde..328b38ba5 100644 --- a/tests/integration/annotation_import/test_model_run.py +++ b/tests/integration/annotation_import/test_model_run.py @@ -87,11 +87,12 @@ def test_model_run_data_rows_delete(model_run_with_data_rows): assert len(before) == len(after) + 1 -def test_model_run_upsert_data_rows(dataset, model_run, configured_project): +def test_model_run_upsert_data_rows(dataset, model_run, + configured_project_with_one_data_row): n_model_run_data_rows = len(list(model_run.model_run_data_rows())) assert n_model_run_data_rows == 0 data_row = dataset.create_data_row(row_data="test row data") - configured_project._wait_until_data_rows_are_processed( + configured_project_with_one_data_row._wait_until_data_rows_are_processed( data_row_ids=[data_row.uid]) model_run.upsert_data_rows([data_row.uid]) n_model_run_data_rows = len(list(model_run.model_run_data_rows())) @@ -167,15 +168,14 @@ def get_model_run_status(): errorMessage) -def test_model_run_split_assignment_by_data_row_ids(model_run, dataset, - image_url, - configured_project): - n_data_rows = 10 +def test_model_run_split_assignment_by_data_row_ids( + model_run, dataset, image_url, configured_project_with_one_data_row): + n_data_rows = 2 data_rows = dataset.create_data_rows([{ "row_data": image_url } for _ in range(n_data_rows)]) data_row_ids = [data_row['id'] for data_row in data_rows.result] - configured_project._wait_until_data_rows_are_processed( + configured_project_with_one_data_row._wait_until_data_rows_are_processed( data_row_ids=data_row_ids) model_run.upsert_data_rows(data_row_ids) diff --git a/tests/integration/annotation_import/test_ndjson_validation.py b/tests/integration/annotation_import/test_ndjson_validation.py index 466968e85..123752402 100644 --- a/tests/integration/annotation_import/test_ndjson_validation.py +++ b/tests/integration/annotation_import/test_ndjson_validation.py @@ -13,19 +13,27 @@ from labelbox.schema.queue_mode import QueueMode -@pytest.fixture -def configured_project_without_data_rows(client, - ontology, - rand_gen, - scope="module"): +@pytest.fixture(scope="module", autouse=True) +def hardcoded_datarow_id(): + data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q' + + def get_data_row_id(indx=0): + return data_row_id + + yield get_data_row_id + + +@pytest.fixture(scope="module", autouse=True) +def configured_project_with_ontology(client, ontology, rand_gen): project = client.create_project(name=rand_gen(str), - description=rand_gen(str), queue_mode=QueueMode.Batch) editor = list( client.get_labeling_frontends( where=LabelingFrontend.name == "editor"))[0] project.setup(editor, ontology) + yield project + project.delete() @@ -55,197 +63,194 @@ def test_tool_construction(inference, expected_type): def test_incorrect_feature_schema(rectangle_inference, polygon_inference, - configured_project_without_data_rows): + configured_project_with_ontology): #Valid but incorrect feature schema #Prob the error message says something about the config not anything useful. We might want to fix this. pred = rectangle_inference.copy() pred['schemaId'] = polygon_inference['schemaId'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def no_tool(text_inference, configured_project_without_data_rows): +def no_tool(text_inference, configured_project_with_ontology): pred = text_inference.copy() #Missing key del pred['answer'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def test_invalid_text(text_inference, configured_project_without_data_rows): +def test_invalid_text(text_inference, configured_project_with_ontology): #and if it is not a string pred = text_inference.copy() #Extra and wrong key del pred['answer'] pred['answers'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) del pred['answers'] #Invalid type pred['answer'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) #Invalid type pred['answer'] = None with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) def test_invalid_checklist_item(checklist_inference, - configured_project_without_data_rows): + configured_project_with_ontology): #Only two points pred = checklist_inference.copy() pred['answers'] = [pred['answers'][0], pred['answers'][0]] #Duplicate schema ids with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) pred['answers'] = [{"name": "asdfg"}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) pred['answers'] = [{"schemaId": "1232132132"}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) pred['answers'] = [{}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) pred['answers'] = [] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) del pred['answers'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def test_invalid_polygon(polygon_inference, - configured_project_without_data_rows): +def test_invalid_polygon(polygon_inference, configured_project_with_ontology): #Only two points pred = polygon_inference.copy() pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def test_incorrect_entity(entity_inference, - configured_project_without_data_rows): +def test_incorrect_entity(entity_inference, configured_project_with_ontology): entity = entity_inference.copy() #Location cannot be a list entity["location"] = [0, 10] with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project_without_data_rows) + _validate_ndjson([entity], configured_project_with_ontology) entity["location"] = {"start": -1, "end": 5} with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project_without_data_rows) + _validate_ndjson([entity], configured_project_with_ontology) entity["location"] = {"start": 15, "end": 5} with pytest.raises(MALValidationError): - _validate_ndjson([entity], configured_project_without_data_rows) + _validate_ndjson([entity], configured_project_with_ontology) def test_incorrect_mask(segmentation_inference, - configured_project_without_data_rows): + configured_project_with_ontology): seg = segmentation_inference.copy() seg['mask']['colorRGB'] = [-1, 0, 10] with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project_without_data_rows) + _validate_ndjson([seg], configured_project_with_ontology) seg['mask']['colorRGB'] = [0, 0] with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project_without_data_rows) + _validate_ndjson([seg], configured_project_with_ontology) seg['mask'] = {'counts': [0], 'size': [0, 1]} with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project_without_data_rows) + _validate_ndjson([seg], configured_project_with_ontology) seg['mask'] = {'counts': [-1], 'size': [1, 1]} with pytest.raises(MALValidationError): - _validate_ndjson([seg], configured_project_without_data_rows) + _validate_ndjson([seg], configured_project_with_ontology) -def test_all_validate_json(configured_project_without_data_rows, predictions): +def test_all_validate_json(configured_project_with_ontology, predictions): #Predictions contains one of each type of prediction. #These should be properly formatted and pass. - _validate_ndjson(predictions, configured_project_without_data_rows) + _validate_ndjson(predictions, configured_project_with_ontology) -def test_incorrect_line(line_inference, configured_project_without_data_rows): +def test_incorrect_line(line_inference, configured_project_with_ontology): line = line_inference.copy() line["line"] = [line["line"][0]] #Just one point with pytest.raises(MALValidationError): - _validate_ndjson([line], configured_project_without_data_rows) + _validate_ndjson([line], configured_project_with_ontology) def test_incorrect_rectangle(rectangle_inference, - configured_project_without_data_rows): + configured_project_with_ontology): del rectangle_inference['bbox']['top'] with pytest.raises(MALValidationError): _validate_ndjson([rectangle_inference], - configured_project_without_data_rows) + configured_project_with_ontology) -def test_duplicate_tools(rectangle_inference, - configured_project_without_data_rows): +def test_duplicate_tools(rectangle_inference, configured_project_with_ontology): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def test_invalid_feature_schema(configured_project_without_data_rows, +def test_invalid_feature_schema(configured_project_with_ontology, rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() pred['schemaId'] = "blahblah" with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def test_name_only_feature_schema(configured_project_without_data_rows, +def test_name_only_feature_schema(configured_project_with_ontology, rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['schemaId'] - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def test_schema_id_only_feature_schema(configured_project_without_data_rows, +def test_schema_id_only_feature_schema(configured_project_with_ontology, rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['name'] - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def test_missing_feature_schema(configured_project_without_data_rows, +def test_missing_feature_schema(configured_project_with_ontology, rectangle_inference): #Trying to upload a polygon and rectangle at the same time pred = rectangle_inference.copy() del pred['schemaId'] del pred['name'] with pytest.raises(MALValidationError): - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) -def test_validate_ndjson(tmp_path, configured_project_without_data_rows): +def test_validate_ndjson(tmp_path, configured_project_with_ontology): file_name = f"broken.ndjson" file_path = tmp_path / file_name with file_path.open("w") as f: f.write("test") with pytest.raises(ValueError): - configured_project_without_data_rows.upload_annotations( + configured_project_with_ontology.upload_annotations( name="name", annotations=str(file_path), validate=True) -def test_validate_ndjson_uuid(tmp_path, configured_project_without_data_rows, +def test_validate_ndjson_uuid(tmp_path, configured_project_with_ontology, predictions): file_name = f"repeat_uuid.ndjson" file_path = tmp_path / file_name @@ -257,15 +262,15 @@ def test_validate_ndjson_uuid(tmp_path, configured_project_without_data_rows, parser.dump(repeat_uuid, f) with pytest.raises(MALValidationError): - configured_project_without_data_rows.upload_annotations( + configured_project_with_ontology.upload_annotations( name="name", validate=True, annotations=str(file_path)) with pytest.raises(MALValidationError): - configured_project_without_data_rows.upload_annotations( + configured_project_with_ontology.upload_annotations( name="name", validate=True, annotations=repeat_uuid) def test_video_upload(video_checklist_inference, - configured_project_without_data_rows): + configured_project_with_ontology): pred = video_checklist_inference.copy() - _validate_ndjson([pred], configured_project_without_data_rows) + _validate_ndjson([pred], configured_project_with_ontology) diff --git a/tests/integration/annotation_import/test_upsert_prediction_import.py b/tests/integration/annotation_import/test_upsert_prediction_import.py index 9ed045f5e..927b6526d 100644 --- a/tests/integration/annotation_import/test_upsert_prediction_import.py +++ b/tests/integration/annotation_import/test_upsert_prediction_import.py @@ -13,7 +13,7 @@ @pytest.mark.skip() def test_create_from_url(client, tmp_path, object_predictions, model_run_with_data_rows, - configured_project_without_data_rows, + configured_project_with_one_data_row, annotation_import_test_helpers): name = str(uuid.uuid4()) file_name = f"{name}.json" @@ -41,7 +41,7 @@ def test_create_from_url(client, tmp_path, object_predictions, annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=url, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, priority=5) assert annotation_import.model_run_id == model_run_with_data_rows.uid @@ -50,7 +50,7 @@ def test_create_from_url(client, tmp_path, object_predictions, assert annotation_import.statuses assert batch - assert batch.project().uid == configured_project_without_data_rows.uid + assert batch.project().uid == configured_project_with_one_data_row.uid assert mal_prediction_import mal_prediction_import.wait_until_done() @@ -61,7 +61,7 @@ def test_create_from_url(client, tmp_path, object_predictions, @pytest.mark.skip() def test_create_from_objects(model_run_with_data_rows, - configured_project_without_data_rows, + configured_project_with_one_data_row, object_predictions, annotation_import_test_helpers): name = str(uuid.uuid4()) @@ -76,7 +76,7 @@ def test_create_from_objects(model_run_with_data_rows, annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=predictions, - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, priority=5) assert annotation_import.model_run_id == model_run_with_data_rows.uid @@ -85,7 +85,7 @@ def test_create_from_objects(model_run_with_data_rows, assert annotation_import.statuses assert batch - assert batch.project().uid == configured_project_without_data_rows.uid + assert batch.project().uid == configured_project_with_one_data_row.uid assert mal_prediction_import mal_prediction_import.wait_until_done() @@ -96,7 +96,7 @@ def test_create_from_objects(model_run_with_data_rows, @pytest.mark.skip() def test_create_from_local_file(tmp_path, model_run_with_data_rows, - configured_project_without_data_rows, + configured_project_with_one_data_row, object_predictions, annotation_import_test_helpers): @@ -119,7 +119,7 @@ def test_create_from_local_file(tmp_path, model_run_with_data_rows, annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project( name=name, predictions=str(file_path), - project_id=configured_project_without_data_rows.uid, + project_id=configured_project_with_one_data_row.uid, priority=5) assert annotation_import.model_run_id == model_run_with_data_rows.uid @@ -128,7 +128,7 @@ def test_create_from_local_file(tmp_path, model_run_with_data_rows, assert annotation_import.statuses assert batch - assert batch.project().uid == configured_project_without_data_rows.uid + assert batch.project().uid == configured_project_with_one_data_row.uid assert mal_prediction_import mal_prediction_import.wait_until_done() diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 82c739ddb..56ce6bae1 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -21,6 +21,7 @@ from labelbox.schema.annotation_import import LabelImport from labelbox.schema.enums import AnnotationImportState from labelbox.schema.invite import Invite +from labelbox.schema.project import Project from labelbox.schema.queue_mode import QueueMode from labelbox.schema.user import User @@ -425,16 +426,21 @@ def configured_project_with_label(client, rand_gen, image_url, project, dataset, Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ + start_time = time.time() + project._wait_until_data_rows_are_processed(data_row_ids=[data_row.uid], + sleep_interval=3) project.create_batch( rand_gen(str), [data_row.uid], # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) ) + print("create_batch took: ", time.time() - start_time) ontology = _setup_ontology(project) + print("setup ontology took: ", time.time() - start_time) label = _create_label(project, data_row, ontology, wait_for_label_processing) - + print("create_label took: ", time.time() - start_time) yield [project, dataset, data_row, label] for label in project.labels(): @@ -817,11 +823,13 @@ def upload_invalid_data_rows_for_dataset(dataset: Dataset): task.wait_till_done() +@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ) def pytest_configure(): pytest.report = defaultdict(int) pytest.data_row_report = {'times': 0, 'num_rows': 0} +@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ) @pytest.hookimpl(hookwrapper=True) def pytest_fixture_setup(fixturedef, request): start = time.time() @@ -832,11 +840,8 @@ def pytest_fixture_setup(fixturedef, request): exec_time = end - start pytest.report[fixturedef.argname] += exec_time - # print('pytest_fixture_setup' - # f', request={request}' - # f', create_data_row_time={end - start}') - +@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ) @pytest.fixture(scope='session', autouse=True) def print_perf_summary(): yield diff --git a/tests/integration/export_v2/conftest.py b/tests/integration/export_v2/conftest.py index af8b4c66f..757bba44e 100644 --- a/tests/integration/export_v2/conftest.py +++ b/tests/integration/export_v2/conftest.py @@ -297,7 +297,7 @@ def configured_project_with_ontology(client, initial_dataset, ontology, @pytest.fixture -def configured_project_without_data_rows(client, ontology, rand_gen): +def configured_project_with_one_data_row(client, ontology, rand_gen): project = client.create_project(name=rand_gen(str), description=rand_gen(str), queue_mode=QueueMode.Batch) diff --git a/tests/integration/export_v2/test_export_video.py b/tests/integration/export_v2/test_export_video.py index 863f4d31e..94828e1b6 100644 --- a/tests/integration/export_v2/test_export_video.py +++ b/tests/integration/export_v2/test_export_video.py @@ -5,11 +5,11 @@ from labelbox.schema.annotation_import import AnnotationImportState -def test_export_v2_video(client, configured_project_without_data_rows, +def test_export_v2_video(client, configured_project_with_one_data_row, video_data, video_data_row, bbox_video_annotation_objects, rand_gen): - project = configured_project_without_data_rows + project = configured_project_with_one_data_row project_id = project.uid labels = [] _, data_row_uids = video_data From 65990878d68bb1c35a3c9e6f6dff27c83ba1ea40 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 18 Aug 2023 10:58:02 -0700 Subject: [PATCH 05/10] Remove more sources of data leakage --- .../annotation_import/test_data_types.py | 19 ++++++++++++++----- tests/integration/conftest.py | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py index 5de79f5cc..a5c27eb20 100644 --- a/tests/integration/annotation_import/test_data_types.py +++ b/tests/integration/annotation_import/test_data_types.py @@ -137,7 +137,8 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name): ]) def test_import_data_types(client, configured_project, initial_dataset, rand_gen, data_row_json_by_data_type, - annotations_by_data_type, data_type_class): + annotations_by_data_type, data_type_class, + one_datarow): project = configured_project project_id = project.uid @@ -304,14 +305,22 @@ def test_import_label_annotations(client, configured_project_with_one_data_row, @pytest.mark.parametrize('data_type, data_class, annotations', test_params) -def test_import_mal_annotations(client, configured_project_with_one_data_row, - data_row_json_by_data_type, data_type, - data_class, annotations, rand_gen): - +@pytest.fixture +def one_datarow(client, rand_gen, data_row_json_by_data_type, data_type): dataset = client.create_dataset(name=rand_gen(str)) data_row_json = data_row_json_by_data_type[data_type] data_row = dataset.create_data_row(data_row_json) + yield data_row + + dataset.delete() + + +@pytest.mark.parametrize('data_type, data_class, annotations', test_params) +def test_import_mal_annotations(client, configured_project_with_one_data_row, + data_type, data_class, annotations, rand_gen, + one_datarow): + data_row = one_datarow set_project_media_type_from_data_type(configured_project_with_one_data_row, data_class) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 56ce6bae1..af66a7ed4 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -390,6 +390,8 @@ def initial_dataset(client, rand_gen): dataset = client.create_dataset(name=rand_gen(str)) yield dataset + dataset.delete() + @pytest.fixture def project_with_ontology(project): From 9e41e82169da875ec0d9aee9638fa6fbca864b60 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 18 Aug 2023 11:31:08 -0700 Subject: [PATCH 06/10] Add config for fixture profiling --- Makefile | 1 + .../integration/annotation_import/conftest.py | 11 +----- .../annotation_import/test_data_types.py | 13 +++++-- tests/integration/conftest.py | 37 +++++++------------ 4 files changed, 25 insertions(+), 37 deletions(-) diff --git a/Makefile b/Makefile index f9f490554..b7838a7d4 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ test-local: build-image -e LABELBOX_TEST_ENVIRON="local" \ -e DA_GCP_LABELBOX_API_KEY=${DA_GCP_LABELBOX_API_KEY} \ -e LABELBOX_TEST_API_KEY_LOCAL=${LABELBOX_TEST_API_KEY_LOCAL} \ + -e FIXTURE_PROFILE=true \ local/labelbox-python:test pytest $(PATH_TO_TEST) test-staging: build-image diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 3f1cd7de5..988ad7883 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -498,7 +498,6 @@ def get_data_row_id(indx=0): @pytest.fixture def configured_project(client, initial_dataset, ontology, rand_gen, image_url): - start_time = time.time() dataset = initial_dataset project = client.create_project(name=rand_gen(str), queue_mode=QueueMode.Batch) @@ -515,15 +514,12 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url): num_rows += 1 project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids, sleep_interval=3) - if pytest.data_row_report: - pytest.data_row_report['times'] += time.time() - start_time - pytest.data_row_report['num_rows'] += num_rows + project.create_batch( rand_gen(str), data_row_ids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) ) - print("After creating batch ", time.time() - start_time) project.data_row_ids = data_row_ids yield project @@ -579,8 +575,6 @@ def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row, @pytest.fixture def configured_project_with_one_data_row(client, ontology, rand_gen, initial_dataset, image_url): - start_time = time.time() - project = client.create_project(name=rand_gen(str), description=rand_gen(str), queue_mode=QueueMode.Batch) @@ -594,9 +588,6 @@ def configured_project_with_one_data_row(client, ontology, rand_gen, project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids, sleep_interval=3) - if pytest.data_row_report: - pytest.data_row_report['times'] += time.time() - start_time - pytest.data_row_report['num_rows'] += 1 batch = project.create_batch( rand_gen(str), data_row_ids, # sample of data row objects diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py index a5c27eb20..79e8b03cb 100644 --- a/tests/integration/annotation_import/test_data_types.py +++ b/tests/integration/annotation_import/test_data_types.py @@ -135,10 +135,15 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name): AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData, TextData ]) -def test_import_data_types(client, configured_project, initial_dataset, - rand_gen, data_row_json_by_data_type, - annotations_by_data_type, data_type_class, - one_datarow): +def test_import_data_types( + client, + configured_project, + initial_dataset, + rand_gen, + data_row_json_by_data_type, + annotations_by_data_type, + data_type_class, +): project = configured_project project_id = project.uid diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index af66a7ed4..a6651b97d 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -27,8 +27,6 @@ IMG_URL = "https://picsum.photos/200/300.jpg" SMALL_DATASET_URL = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg" -DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 30 -DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS = 5 class Environ(Enum): @@ -458,10 +456,8 @@ def configured_batch_project_with_label(project, dataset, data_row, One label is already created and yielded when using fixture """ data_rows = [dr.uid for dr in list(dataset.data_rows())] - project._wait_until_data_rows_are_processed( - data_row_ids=data_rows, - wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS, - sleep_interval=DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS) + project._wait_until_data_rows_are_processed(data_row_ids=data_rows, + sleep_interval=3) project.create_batch("test-batch", data_rows) project.data_row_ids = data_rows @@ -604,7 +600,6 @@ def configured_project_with_complex_ontology(client, initial_dataset, rand_gen, project.setup(editor, ontology.asdict()) yield [project, data_row] - dataset.delete() project.delete() @@ -825,35 +820,31 @@ def upload_invalid_data_rows_for_dataset(dataset: Dataset): task.wait_till_done() -@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ) def pytest_configure(): pytest.report = defaultdict(int) - pytest.data_row_report = {'times': 0, 'num_rows': 0} -@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ) @pytest.hookimpl(hookwrapper=True) -def pytest_fixture_setup(fixturedef, request): +def pytest_fixture_setup(fixturedef): start = time.time() yield - end = time.time() exec_time = end - start - pytest.report[fixturedef.argname] += exec_time + if "FIXTURE_PROFILE" in os.environ: + pytest.report[fixturedef.argname] += exec_time -@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ) @pytest.fixture(scope='session', autouse=True) def print_perf_summary(): yield - sorted_dict = dict( - sorted(pytest.report.items(), key=lambda item: item[1], reverse=True)) - num_of_entries = 10 if len(sorted_dict) >= 10 else len(sorted_dict) - slowest_fixtures = [ - (aaa, sorted_dict[aaa]) for aaa in islice(sorted_dict, num_of_entries) - ] - print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr) - print("Data row report:\n", pytest.data_row_report, file=sys.stderr) - # assert False + if "FIXTURE_PROFILE" in os.environ: + sorted_dict = dict( + sorted(pytest.report.items(), + key=lambda item: item[1], + reverse=True)) + num_of_entries = 10 if len(sorted_dict) >= 10 else len(sorted_dict) + slowest_fixtures = [(aaa, sorted_dict[aaa]) + for aaa in islice(sorted_dict, num_of_entries)] + print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr) From ba2990d68087b575a7f27ed6e366fab1bb7caf53 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 18 Aug 2023 15:02:14 -0700 Subject: [PATCH 07/10] Add explanation on how to supply data row ids to prediction_id_mapping --- .../integration/annotation_import/conftest.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 988ad7883..1980d6f26 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -505,13 +505,11 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url): client.get_labeling_frontends( where=LabelingFrontend.name == "editor"))[0] project.setup(editor, ontology) - num_rows = 0 data_row_ids = [] for _ in range(len(ontology['tools']) + len(ontology['classifications'])): data_row_ids.append(dataset.create_data_row(row_data=image_url).uid) - num_rows += 1 project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids, sleep_interval=3) @@ -605,6 +603,22 @@ def configured_project_with_one_data_row(client, ontology, rand_gen, # At the moment it expects only one feature per tool type and this creates unnecessary coupling between differet tests # In an example of a 'rectangle' we have extended to support multiple instances of the same tool type # TODO: we will support this approach in the future for all tools +# +""" +Please note that this fixture now offers the flexibility to configure three different strategies for generating data row ids for predictions: +Default(configured_project fixture): + configured_project that generates a data row for each member of ontology. + This makes sure each prediction has its own data row id. This is applicable to prediction upload cases when last label overwrites existing ones + +Optimized Strategy (configured_project_with_one_data_row fixture): + This fixture has only one data row and all predictions will be mapped to it + +Custom Data Row IDs Strategy: + Individuals can create their own fixture to supply data row ids. + This particular fixture, termed "hardcoded_datarow_id," should be defined locally within a test file. +""" + + @pytest.fixture def prediction_id_mapping(ontology, request): # Maps tool types to feature schema ids From 700fefe87cb2259ea78b8994f986b641ce954cd7 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Fri, 18 Aug 2023 15:50:42 -0700 Subject: [PATCH 08/10] Fix test_user_and_org.py --- tests/integration/test_user_and_org.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_user_and_org.py b/tests/integration/test_user_and_org.py index 9f07666de..ca158527c 100644 --- a/tests/integration/test_user_and_org.py +++ b/tests/integration/test_user_and_org.py @@ -1,3 +1,6 @@ +from labelbox.schema.project import Project + + def test_user(client): user = client.get_user() assert user.uid is not None @@ -10,14 +13,11 @@ def test_organization(client): assert client.get_user() in set(organization.users()) -def test_user_and_org_projects(project): - client = project.client +def test_user_and_org_projects(client, project): user = client.get_user() org = client.get_organization() - user_projects = set(user.projects()) - org_projects = set(org.projects()) + user_project = user.projects(where=Project.uid == project.uid) + org_project = org.projects(where=Project.uid == project.uid) - assert project.created_by() == user - assert project.organization() == org - assert project in user_projects - assert project in org_projects \ No newline at end of file + assert user_project + assert org_project \ No newline at end of file From 5a6e250fba457336a78bd249c1cc22efc1973842 Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 22 Aug 2023 17:18:27 -0700 Subject: [PATCH 09/10] PR updates --- .../integration/annotation_import/conftest.py | 8 +++++-- .../test_bulk_import_request.py | 8 +++---- tests/integration/conftest.py | 22 ++++++++++--------- tests/integration/export_v2/conftest.py | 2 +- .../export_v2/test_export_video.py | 4 ++-- tests/integration/test_filtering.py | 4 ++-- tests/integration/test_project.py | 6 ++--- 7 files changed, 30 insertions(+), 24 deletions(-) diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index 1980d6f26..ebfe74f47 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -513,7 +513,7 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url): project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids, sleep_interval=3) - project.create_batch( + batch = project.create_batch( rand_gen(str), data_row_ids, # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) @@ -521,6 +521,8 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url): project.data_row_ids = data_row_ids yield project + + batch.delete() project.delete() @@ -614,8 +616,10 @@ def configured_project_with_one_data_row(client, ontology, rand_gen, This fixture has only one data row and all predictions will be mapped to it Custom Data Row IDs Strategy: - Individuals can create their own fixture to supply data row ids. + Individuals can supply hard-coded data row ids when a creation of data row is not required. This particular fixture, termed "hardcoded_datarow_id," should be defined locally within a test file. + In the future, we can use this approach to inject correct number of rows instead of using configured_project fixture + that creates a data row for each member of ontology (14 in total) for each run. """ diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py index 6691cc044..52552f53d 100644 --- a/tests/integration/annotation_import/test_bulk_import_request.py +++ b/tests/integration/annotation_import/test_bulk_import_request.py @@ -41,13 +41,13 @@ def test_create_from_url(project): assert bulk_import_request.state == BulkImportRequestState.RUNNING -def test_validate_file(project_with_ontology): +def test_validate_file(project_with_empty_ontology): name = str(uuid.uuid4()) url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson" with pytest.raises(MALValidationError): - project_with_ontology.upload_annotations(name=name, - annotations=url, - validate=True) + project_with_empty_ontology.upload_annotations(name=name, + annotations=url, + validate=True) #Schema ids shouldn't match diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index a6651b97d..781fe6edb 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -27,6 +27,8 @@ IMG_URL = "https://picsum.photos/200/300.jpg" SMALL_DATASET_URL = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg" +DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 30 +DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS = 3 class Environ(Enum): @@ -392,7 +394,7 @@ def initial_dataset(client, rand_gen): @pytest.fixture -def project_with_ontology(project): +def project_with_empty_ontology(project): editor = list( project.client.get_labeling_frontends( where=LabelingFrontend.name == "editor"))[0] @@ -402,13 +404,13 @@ def project_with_ontology(project): @pytest.fixture -def configured_project(project_with_ontology, initial_dataset, rand_gen, +def configured_project(project_with_empty_ontology, initial_dataset, rand_gen, image_url): dataset = initial_dataset data_row_id = dataset.create_data_row(row_data=image_url).uid - project = project_with_ontology + project = project_with_empty_ontology - project.create_batch( + batch = project.create_batch( rand_gen(str), [data_row_id], # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) @@ -417,6 +419,8 @@ def configured_project(project_with_ontology, initial_dataset, rand_gen, yield project + batch.delete() + @pytest.fixture def configured_project_with_label(client, rand_gen, image_url, project, dataset, @@ -426,21 +430,19 @@ def configured_project_with_label(client, rand_gen, image_url, project, dataset, Additionally includes a create_label method for any needed extra labels One label is already created and yielded when using fixture """ - start_time = time.time() - project._wait_until_data_rows_are_processed(data_row_ids=[data_row.uid], - sleep_interval=3) + project._wait_until_data_rows_are_processed( + data_row_ids=[data_row.uid], + wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS, + sleep_interval=DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS) project.create_batch( rand_gen(str), [data_row.uid], # sample of data row objects 5 # priority between 1(Highest) - 5(lowest) ) - print("create_batch took: ", time.time() - start_time) ontology = _setup_ontology(project) - print("setup ontology took: ", time.time() - start_time) label = _create_label(project, data_row, ontology, wait_for_label_processing) - print("create_label took: ", time.time() - start_time) yield [project, dataset, data_row, label] for label in project.labels(): diff --git a/tests/integration/export_v2/conftest.py b/tests/integration/export_v2/conftest.py index 757bba44e..af8b4c66f 100644 --- a/tests/integration/export_v2/conftest.py +++ b/tests/integration/export_v2/conftest.py @@ -297,7 +297,7 @@ def configured_project_with_ontology(client, initial_dataset, ontology, @pytest.fixture -def configured_project_with_one_data_row(client, ontology, rand_gen): +def configured_project_without_data_rows(client, ontology, rand_gen): project = client.create_project(name=rand_gen(str), description=rand_gen(str), queue_mode=QueueMode.Batch) diff --git a/tests/integration/export_v2/test_export_video.py b/tests/integration/export_v2/test_export_video.py index 94828e1b6..863f4d31e 100644 --- a/tests/integration/export_v2/test_export_video.py +++ b/tests/integration/export_v2/test_export_video.py @@ -5,11 +5,11 @@ from labelbox.schema.annotation_import import AnnotationImportState -def test_export_v2_video(client, configured_project_with_one_data_row, +def test_export_v2_video(client, configured_project_without_data_rows, video_data, video_data_row, bbox_video_annotation_objects, rand_gen): - project = configured_project_with_one_data_row + project = configured_project_without_data_rows project_id = project.uid labels = [] _, data_row_uids = video_data diff --git a/tests/integration/test_filtering.py b/tests/integration/test_filtering.py index fde7f0638..f44cdcdcb 100644 --- a/tests/integration/test_filtering.py +++ b/tests/integration/test_filtering.py @@ -15,7 +15,7 @@ def project_to_test_where(client, rand_gen): p_b = client.create_project(name=p_b_name, queue_mode=QueueMode.Batch) p_c = client.create_project(name=p_c_name, queue_mode=QueueMode.Batch) - yield p_a, p_b, p_c + yield p_a, p_b p_a.delete() p_b.delete() @@ -26,7 +26,7 @@ def project_to_test_where(client, rand_gen): # other builds simultaneously adding projects to test org def test_where(client, project_to_test_where): p_a, p_b, p_c = project_to_test_where - p_a_name, p_b_name, _ = [p.name for p in [p_a, p_b, p_c]] + p_a_name, p_b_name = [p.name for p in [p_a, p_b]] def get(where=None): date_where = Project.created_at >= p_a.created_at diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index b3b683a3d..94c98ee50 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -171,15 +171,15 @@ def test_attach_instructions(client, project): @pytest.mark.skipif(condition=os.environ['LABELBOX_TEST_ENVIRON'] == "onprem", reason="new mutation does not work for onprem") -def test_html_instructions(project_with_ontology): +def test_html_instructions(project_with_empty_ontology): html_file_path = '/tmp/instructions.html' sample_html_str = "" with open(html_file_path, 'w') as file: file.write(sample_html_str) - project_with_ontology.upsert_instructions(html_file_path) - updated_ontology = project_with_ontology.ontology().normalized + project_with_empty_ontology.upsert_instructions(html_file_path) + updated_ontology = project_with_empty_ontology.ontology().normalized instructions = updated_ontology.pop('projectInstructions') assert requests.get(instructions).text == sample_html_str From e585e8c14c89503a99319c342f80903bc5eeeaec Mon Sep 17 00:00:00 2001 From: Val Brodsky Date: Tue, 22 Aug 2023 17:35:52 -0700 Subject: [PATCH 10/10] Turn on fixture profile for staging --- .github/workflows/python-package.yml | 1 + tests/integration/annotation_import/conftest.py | 1 - tests/integration/test_filtering.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6ed378f09..83c0393af 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -39,6 +39,7 @@ jobs: echo "LABELBOX_TEST_ENVIRON=prod" >> $GITHUB_ENV else echo "LABELBOX_TEST_ENVIRON=staging" >> $GITHUB_ENV + echo "FIXTURE_PROFILE=true" >> $GITHUB_ENV fi - uses: actions/checkout@v2 diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py index ebfe74f47..d50c44d0c 100644 --- a/tests/integration/annotation_import/conftest.py +++ b/tests/integration/annotation_import/conftest.py @@ -522,7 +522,6 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url): yield project - batch.delete() project.delete() diff --git a/tests/integration/test_filtering.py b/tests/integration/test_filtering.py index f44cdcdcb..7dd687759 100644 --- a/tests/integration/test_filtering.py +++ b/tests/integration/test_filtering.py @@ -15,7 +15,7 @@ def project_to_test_where(client, rand_gen): p_b = client.create_project(name=p_b_name, queue_mode=QueueMode.Batch) p_c = client.create_project(name=p_c_name, queue_mode=QueueMode.Batch) - yield p_a, p_b + yield p_a, p_b, p_c p_a.delete() p_b.delete()