Labelbox · vbrodsky · Aug 23, 2023 · Aug 2, 2023 · Aug 14, 2023 · Aug 15, 2023
@@ -39,6 +39,7 @@ jobs:
             echo "LABELBOX_TEST_ENVIRON=prod" >> $GITHUB_ENV
           else
             echo "LABELBOX_TEST_ENVIRON=staging" >> $GITHUB_ENV
+            echo "FIXTURE_PROFILE=true" >> $GITHUB_ENV
           fi
 
       - uses: actions/checkout@v2

@@ -13,6 +13,7 @@ test-local: build-image
 		-e LABELBOX_TEST_ENVIRON="local" \
 		-e DA_GCP_LABELBOX_API_KEY=${DA_GCP_LABELBOX_API_KEY} \
 		-e LABELBOX_TEST_API_KEY_LOCAL=${LABELBOX_TEST_API_KEY_LOCAL} \
+		-e FIXTURE_PROFILE=true \
 		local/labelbox-python:test pytest $(PATH_TO_TEST)
 
 test-staging: build-image

@@ -12,7 +12,7 @@
 ]
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def rand_gen():
 
     def gen(field_type):

@@ -9,6 +9,7 @@
 from typing import Type
 from labelbox.schema.labeling_frontend import LabelingFrontend
 from labelbox.schema.annotation_import import LabelImport, AnnotationImportState
+from labelbox.schema.project import Project
 from labelbox.schema.queue_mode import QueueMode
 
 DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 40
@@ -210,7 +211,7 @@ def annotations_by_data_type_v2(
     }
 
 
-@pytest.fixture
+@pytest.fixture(scope='session')
 def ontology():
     bbox_tool_with_nested_text = {
         'required':
@@ -478,34 +479,49 @@ def func(project):
 
 
 @pytest.fixture
-def initial_dataset(client, rand_gen):
-    dataset = client.create_dataset(name=rand_gen(str))
-    yield dataset
-    dataset.delete()
+def configured_project_datarow_id(configured_project):
+
+    def get_data_row_id(indx=0):
+        return configured_project.data_row_ids[indx]
+
+    yield get_data_row_id
+
+
+@pytest.fixture
+def configured_project_one_datarow_id(configured_project_with_one_data_row):
+
+    def get_data_row_id(indx=0):
+        return configured_project_with_one_data_row.data_row_ids[0]
+
+    yield get_data_row_id
 
 
 @pytest.fixture
 def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
     dataset = initial_dataset
-    project = client.create_project(
-        name=rand_gen(str),
-        queue_mode=QueueMode.Batch,
-    )
+    project = client.create_project(name=rand_gen(str),
+                                    queue_mode=QueueMode.Batch)
     editor = list(
         client.get_labeling_frontends(
             where=LabelingFrontend.name == "editor"))[0]
     project.setup(editor, ontology)
+
     data_row_ids = []
 
     for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
         data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
-    project.create_batch(
+    project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
+                                                sleep_interval=3)
+
+    batch = project.create_batch(
         rand_gen(str),
         data_row_ids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
     )
     project.data_row_ids = data_row_ids
+
     yield project
+
     project.delete()
 
 
@@ -556,27 +572,74 @@ def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row,
 
 
 @pytest.fixture
-def configured_project_without_data_rows(client, ontology, rand_gen):
+def configured_project_with_one_data_row(client, ontology, rand_gen,
+                                         initial_dataset, image_url):
     project = client.create_project(name=rand_gen(str),
                                     description=rand_gen(str),
                                     queue_mode=QueueMode.Batch)
     editor = list(
         client.get_labeling_frontends(
             where=LabelingFrontend.name == "editor"))[0]
     project.setup(editor, ontology)
+
+    data_row = initial_dataset.create_data_row(row_data=image_url)
+    data_row_ids = [data_row.uid]
+    project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
+                                                sleep_interval=3)
+
+    batch = project.create_batch(
+        rand_gen(str),
+        data_row_ids,  # sample of data row objects
+        5  # priority between 1(Highest) - 5(lowest)
+    )
+    project.data_row_ids = data_row_ids
+
     yield project
+
+    batch.delete()
     project.delete()
 
 
 # This function allows to convert an ontology feature to actual annotation
 # At the moment it expects only one feature per tool type and this creates unnecessary coupling between differet tests
 # In an example of a 'rectangle' we have extended to support multiple instances of the same tool type
 # TODO: we will support this approach in the future for all tools
+#
+"""
+Please note that this fixture now offers the flexibility to configure three different strategies for generating data row ids for predictions:
+Default(configured_project fixture):
+    configured_project that generates a data row for each member of ontology.
+    This makes sure each prediction has its own data row id. This is applicable to prediction upload cases when last label overwrites existing ones
+
+Optimized Strategy (configured_project_with_one_data_row fixture):
+    This fixture has only one data row and all predictions will be mapped to it
+
+Custom Data Row IDs Strategy:
+    Individuals can supply hard-coded data row ids when a creation of data row is not required. 
+    This particular fixture, termed "hardcoded_datarow_id," should be defined locally within a test file.
+    In the future, we can use this approach to inject correct number of rows instead of using configured_project fixture 
+        that creates a data row for each member of ontology (14 in total) for each run.
+"""
+
+
 @pytest.fixture
-def prediction_id_mapping(configured_project):
+def prediction_id_mapping(ontology, request):
     # Maps tool types to feature schema ids
-    project = configured_project
+    if 'configured_project' in request.fixturenames:
+        data_row_id_factory = request.getfixturevalue(
+            'configured_project_datarow_id')
+        project = request.getfixturevalue('configured_project')
+    elif 'hardcoded_datarow_id' in request.fixturenames:
+        data_row_id_factory = request.getfixturevalue('hardcoded_datarow_id')
+        project = request.getfixturevalue('configured_project_with_ontology')
+    else:
+        data_row_id_factory = request.getfixturevalue(
+            'configured_project_one_datarow_id')
+        project = request.getfixturevalue(
+            'configured_project_with_one_data_row')
+
     ontology = project.ontology().normalized
+
     result = {}
 
     for idx, tool in enumerate(ontology['tools'] + ontology['classifications']):
@@ -593,7 +656,7 @@ def prediction_id_mapping(configured_project):
                 "schemaId": tool['featureSchemaId'],
                 "name": tool['name'],
                 "dataRow": {
-                    "id": project.data_row_ids[idx],
+                    "id": data_row_id_factory(idx),
                 },
                 'tool': tool
             }
@@ -606,7 +669,7 @@ def prediction_id_mapping(configured_project):
                 "schemaId": tool['featureSchemaId'],
                 "name": tool['name'],
                 "dataRow": {
-                    "id": project.data_row_ids[idx],
+                    "id": data_row_id_factory(idx),
                 },
                 'tool': tool
             }

@@ -25,40 +25,40 @@
 """
 
 
-def test_create_from_url(configured_project):
+def test_create_from_url(project):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
 
-    bulk_import_request = configured_project.upload_annotations(name=name,
-                                                                annotations=url,
-                                                                validate=False)
+    bulk_import_request = project.upload_annotations(name=name,
+                                                     annotations=url,
+                                                     validate=False)
 
-    assert bulk_import_request.project() == configured_project
+    assert bulk_import_request.project() == project
     assert bulk_import_request.name == name
     assert bulk_import_request.input_file_url == url
     assert bulk_import_request.error_file_url is None
     assert bulk_import_request.status_file_url is None
     assert bulk_import_request.state == BulkImportRequestState.RUNNING
 
 
-def test_validate_file(configured_project):
+def test_validate_file(project_with_empty_ontology):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
     with pytest.raises(MALValidationError):
-        configured_project.upload_annotations(name=name,
-                                              annotations=url,
-                                              validate=True)
+        project_with_empty_ontology.upload_annotations(name=name,
+                                                       annotations=url,
+                                                       validate=True)
         #Schema ids shouldn't match
 
 
-def test_create_from_objects(configured_project, predictions,
+def test_create_from_objects(configured_project_with_one_data_row, predictions,
                              annotation_import_test_helpers):
     name = str(uuid.uuid4())
 
-    bulk_import_request = configured_project.upload_annotations(
+    bulk_import_request = configured_project_with_one_data_row.upload_annotations(
         name=name, annotations=predictions)
 
-    assert bulk_import_request.project() == configured_project
+    assert bulk_import_request.project() == configured_project_with_one_data_row
     assert bulk_import_request.name == name
     assert bulk_import_request.error_file_url is None
     assert bulk_import_request.status_file_url is None
@@ -105,34 +105,33 @@ def test_create_from_local_file(tmp_path, predictions, configured_project,
         bulk_import_request.input_file_url, predictions)
 
 
-def test_get(client, configured_project):
+def test_get(client, configured_project_with_one_data_row):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
-    configured_project.upload_annotations(name=name,
-                                          annotations=url,
-                                          validate=False)
+    configured_project_with_one_data_row.upload_annotations(name=name,
+                                                            annotations=url,
+                                                            validate=False)
 
     bulk_import_request = BulkImportRequest.from_name(
-        client, project_id=configured_project.uid, name=name)
+        client, project_id=configured_project_with_one_data_row.uid, name=name)
 
-    assert bulk_import_request.project() == configured_project
+    assert bulk_import_request.project() == configured_project_with_one_data_row
     assert bulk_import_request.name == name
     assert bulk_import_request.input_file_url == url
     assert bulk_import_request.error_file_url is None
     assert bulk_import_request.status_file_url is None
     assert bulk_import_request.state == BulkImportRequestState.RUNNING
 
 
-def test_validate_ndjson(tmp_path, configured_project):
+def test_validate_ndjson(tmp_path, configured_project_with_one_data_row):
     file_name = f"broken.ndjson"
     file_path = tmp_path / file_name
     with file_path.open("w") as f:
         f.write("test")
 
     with pytest.raises(ValueError):
-        configured_project.upload_annotations(name="name",
-                                              validate=True,
-                                              annotations=str(file_path))
+        configured_project_with_one_data_row.upload_annotations(
+            name="name", validate=True, annotations=str(file_path))
 
 
 def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
@@ -158,14 +157,13 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
 
 
 @pytest.mark.slow
-def test_wait_till_done(rectangle_inference, configured_project):
+def test_wait_till_done(rectangle_inference,
+                        configured_project_with_one_data_row):
     name = str(uuid.uuid4())
-    url = configured_project.client.upload_data(content=parser.dumps(
-        [rectangle_inference]),
-                                                sign=True)
-    bulk_import_request = configured_project.upload_annotations(name=name,
-                                                                annotations=url,
-                                                                validate=False)
+    url = configured_project_with_one_data_row.client.upload_data(
+        content=parser.dumps([rectangle_inference]), sign=True)
+    bulk_import_request = configured_project_with_one_data_row.upload_annotations(
+        name=name, annotations=url, validate=False)
 
     assert len(bulk_import_request.inputs) == 1
     bulk_import_request.wait_until_done()
@@ -299,7 +297,7 @@ def test_pdf_mal_bbox(client, configured_project_pdf):
     assert import_annotations.errors == []
 
 
-def test_pdf_document_entity(client, configured_project_without_data_rows,
+def test_pdf_document_entity(client, configured_project_with_one_data_row,
                              dataset_pdf_entity, rand_gen):
     # for content "Metal-insulator (MI) transitions have been one of the" in OCR JSON extract tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json
     document_text_selection = DocumentTextSelection(
@@ -323,7 +321,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,
 
     labels = []
     _, data_row_uids = dataset_pdf_entity
-    configured_project_without_data_rows.create_batch(
+    configured_project_with_one_data_row.create_batch(
         rand_gen(str),
         data_row_uids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
@@ -338,7 +336,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,
 
     import_annotations = MALPredictionImport.create_from_objects(
         client=client,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         name=f"import {str(uuid.uuid4())}",
         predictions=labels)
     import_annotations.wait_until_done()
@@ -347,14 +345,14 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,
 
 
 def test_nested_video_object_annotations(client,
-                                         configured_project_without_data_rows,
+                                         configured_project_with_one_data_row,
                                          video_data,
                                          bbox_video_annotation_objects,
                                          rand_gen):
     labels = []
     _, data_row_uids = video_data
-    configured_project_without_data_rows.update(media_type=MediaType.Video)
-    configured_project_without_data_rows.create_batch(
+    configured_project_with_one_data_row.update(media_type=MediaType.Video)
+    configured_project_with_one_data_row.create_batch(
         rand_gen(str),
         data_row_uids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
@@ -366,7 +364,7 @@ def test_nested_video_object_annotations(client,
                   annotations=bbox_video_annotation_objects))
     import_annotations = MALPredictionImport.create_from_objects(
         client=client,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         name=f"import {str(uuid.uuid4())}",
         predictions=labels)
     import_annotations.wait_until_done()

@@ -7,7 +7,7 @@
 from labelbox.schema.annotation_import import MALPredictionImport
 
 
-def test_conversation_entity(client, configured_project_without_data_rows,
+def test_conversation_entity(client, configured_project_with_one_data_row,
                              dataset_conversation_entity, rand_gen):
 
     conversation_entity_annotation = ConversationEntity(start=0,
@@ -20,7 +20,7 @@ def test_conversation_entity(client, configured_project_without_data_rows,
     labels = []
     _, data_row_uids = dataset_conversation_entity
 
-    configured_project_without_data_rows.create_batch(
+    configured_project_with_one_data_row.create_batch(
         rand_gen(str),
         data_row_uids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
@@ -35,7 +35,7 @@ def test_conversation_entity(client, configured_project_without_data_rows,
 
     import_annotations = MALPredictionImport.create_from_objects(
         client=client,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         name=f"import {str(uuid.uuid4())}",
         predictions=labels)