From 48285e46b2ae509a8c1ccdc4141459ddd5ad006a Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Wed, 2 Aug 2023 16:20:23 -0700
Subject: [PATCH 01/10] Add instrumentation for fixtures(temp)

---
 .../integration/annotation_import/conftest.py | 13 ++++++-
 tests/integration/conftest.py                 | 36 +++++++++++++++++++
 tests/integration/test_dataset.py             |  8 +++--
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
index 6db398fe5..6e35d4d0a 100644
--- a/tests/integration/annotation_import/conftest.py
+++ b/tests/integration/annotation_import/conftest.py
@@ -9,6 +9,7 @@
 from typing import Type
 from labelbox.schema.labeling_frontend import LabelingFrontend
 from labelbox.schema.annotation_import import LabelImport, AnnotationImportState
+from labelbox.schema.project import Project
 from labelbox.schema.queue_mode import QueueMode
 
 DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 40
@@ -486,6 +487,7 @@ def initial_dataset(client, rand_gen):
 
 @pytest.fixture
 def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
+    start_time = time.time()
     dataset = initial_dataset
     project = client.create_project(
         name=rand_gen(str),
@@ -496,14 +498,21 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
             where=LabelingFrontend.name == "editor"))[0]
     project.setup(editor, ontology)
     data_row_ids = []
-
+    # print("Before creating data rows ", time.time() - start_time)
+    num_rows = 0
     for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
         data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
+        num_rows += 1
+    # print("After creating data rows ", time.time() - start_time)
+
+    pytest.data_row_report['times'] += time.time() - start_time
+    pytest.data_row_report['num_rows'] += num_rows
     project.create_batch(
         rand_gen(str),
         data_row_ids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
     )
+    print("After creating batch ", time.time() - start_time)
     project.data_row_ids = data_row_ids
     yield project
     project.delete()
@@ -1006,6 +1015,7 @@ def model_run_with_training_metadata(rand_gen, model):
 @pytest.fixture
 def model_run_with_data_rows(client, configured_project, model_run_predictions,
                              model_run, wait_for_label_processing):
+    start_time = time.time()
     configured_project.enable_model_assisted_labeling()
 
     upload_task = LabelImport.create_from_objects(
@@ -1019,6 +1029,7 @@ def model_run_with_data_rows(client, configured_project, model_run_predictions,
     labels = wait_for_label_processing(configured_project)
     label_ids = [label.uid for label in labels]
     model_run.upsert_labels(label_ids)
+    print(f"model_run_with_data_rows: {time.time() - start_time}")
     yield model_run
     model_run.delete()
     # TODO: Delete resources when that is possible ..
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index ed4229b4d..92e23a375 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -1,3 +1,5 @@
+from collections import defaultdict
+from itertools import islice
 import json
 import os
 import re
@@ -807,3 +809,37 @@ def upload_invalid_data_rows_for_dataset(dataset: Dataset):
         },
     ] * 2)
     task.wait_till_done()
+
+
+def pytest_configure():
+    pytest.report = defaultdict(int)
+    pytest.data_row_report = {'times': 0, 'num_rows': 0}
+
+
+@pytest.hookimpl(hookwrapper=True)
+def pytest_fixture_setup(fixturedef, request):
+    start = time.time()
+    yield
+
+    end = time.time()
+
+    exec_time = end - start
+    pytest.report[fixturedef.argname] += exec_time
+
+    # print('pytest_fixture_setup'
+    #       f', request={request}'
+    #       f', create_data_row_time={end - start}')
+
+
+@pytest.fixture(scope='session', autouse=True)
+def print_perf_summary():
+    yield
+
+    sorted_dict = dict(
+        sorted(pytest.report.items(), key=lambda item: item[1], reverse=True))
+    num_of_entries = 10 if len(sorted_dict) >= 10 else len(sorted_dict)
+    slowest_fixtures = [
+        (aaa, sorted_dict[aaa]) for aaa in islice(sorted_dict, num_of_entries)
+    ]
+    print("\nTop slowest fixtures:\n", slowest_fixtures)
+    print("Data row report:\n", pytest.data_row_report)
diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py
index d1a31e532..de2f15820 100644
--- a/tests/integration/test_dataset.py
+++ b/tests/integration/test_dataset.py
@@ -53,8 +53,12 @@ def dataset_for_filtering(client, rand_gen):
 
     yield name_1, d1, name_2, d2
 
-    d1.delete()
-    d2.delete()
+
+def test_dataset_filtering(client, dataset_for_filtering):
+    name_1, d1, name_2, d2 = dataset_for_filtering
+
+    assert list(client.get_datasets(where=Dataset.name == name_1)) == [d1]
+    assert list(client.get_datasets(where=Dataset.name == name_2)) == [d2]
 
 
 def test_dataset_filtering(client, dataset_for_filtering):

From b95d1b89767c8ea6f085596315d056c8982b9f99 Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Mon, 14 Aug 2023 09:16:26 -0700
Subject: [PATCH 02/10] Convert tests that do now require many data rows
 prebuilt to a simpler project fixture

---
 pytest.ini                                    |  2 +-
 .../integration/annotation_import/conftest.py | 13 ++---
 .../test_bulk_import_request.py               | 49 +++++++++----------
 .../annotation_import/test_data_types.py      | 22 ++++-----
 tests/integration/conftest.py                 | 29 +++++++----
 tests/integration/test_project.py             | 14 ++----
 6 files changed, 64 insertions(+), 65 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index b56afefdd..fbf64a864 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,4 +1,4 @@
 [pytest]
-addopts = -s -vv --reruns 5 --reruns-delay 10 --durations=20
+addopts = -s -vv
 markers =
     slow: marks tests as slow (deselect with '-m "not slow"')
diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
index 6e35d4d0a..1f88de47a 100644
--- a/tests/integration/annotation_import/conftest.py
+++ b/tests/integration/annotation_import/conftest.py
@@ -486,17 +486,12 @@ def initial_dataset(client, rand_gen):
 
 
 @pytest.fixture
-def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
+def configured_project(client, configured_project_without_data_rows,
+                       initial_dataset, ontology, rand_gen, image_url):
     start_time = time.time()
     dataset = initial_dataset
-    project = client.create_project(
-        name=rand_gen(str),
-        queue_mode=QueueMode.Batch,
-    )
-    editor = list(
-        client.get_labeling_frontends(
-            where=LabelingFrontend.name == "editor"))[0]
-    project.setup(editor, ontology)
+    project = configured_project_without_data_rows
+
     data_row_ids = []
     # print("Before creating data rows ", time.time() - start_time)
     num_rows = 0
diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py
index 4f001af8d..7a66dd667 100644
--- a/tests/integration/annotation_import/test_bulk_import_request.py
+++ b/tests/integration/annotation_import/test_bulk_import_request.py
@@ -25,15 +25,15 @@
 """
 
 
-def test_create_from_url(configured_project):
+def test_create_from_url(project):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
 
-    bulk_import_request = configured_project.upload_annotations(name=name,
-                                                                annotations=url,
-                                                                validate=False)
+    bulk_import_request = project.upload_annotations(name=name,
+                                                     annotations=url,
+                                                     validate=False)
 
-    assert bulk_import_request.project() == configured_project
+    assert bulk_import_request.project() == project
     assert bulk_import_request.name == name
     assert bulk_import_request.input_file_url == url
     assert bulk_import_request.error_file_url is None
@@ -41,24 +41,24 @@ def test_create_from_url(configured_project):
     assert bulk_import_request.state == BulkImportRequestState.RUNNING
 
 
-def test_validate_file(configured_project):
+def test_validate_file(project_with_ontology):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
     with pytest.raises(MALValidationError):
-        configured_project.upload_annotations(name=name,
-                                              annotations=url,
-                                              validate=True)
+        project_with_ontology.upload_annotations(name=name,
+                                                 annotations=url,
+                                                 validate=True)
         #Schema ids shouldn't match
 
 
-def test_create_from_objects(configured_project, predictions,
+def test_create_from_objects(configured_project_without_data_rows, predictions,
                              annotation_import_test_helpers):
     name = str(uuid.uuid4())
 
-    bulk_import_request = configured_project.upload_annotations(
+    bulk_import_request = configured_project_without_data_rows.upload_annotations(
         name=name, annotations=predictions)
 
-    assert bulk_import_request.project() == configured_project
+    assert bulk_import_request.project() == configured_project_without_data_rows
     assert bulk_import_request.name == name
     assert bulk_import_request.error_file_url is None
     assert bulk_import_request.status_file_url is None
@@ -105,17 +105,17 @@ def test_create_from_local_file(tmp_path, predictions, configured_project,
         bulk_import_request.input_file_url, predictions)
 
 
-def test_get(client, configured_project):
+def test_get(client, configured_project_without_data_rows):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
-    configured_project.upload_annotations(name=name,
-                                          annotations=url,
-                                          validate=False)
+    configured_project_without_data_rows.upload_annotations(name=name,
+                                                            annotations=url,
+                                                            validate=False)
 
     bulk_import_request = BulkImportRequest.from_name(
-        client, project_id=configured_project.uid, name=name)
+        client, project_id=configured_project_without_data_rows.uid, name=name)
 
-    assert bulk_import_request.project() == configured_project
+    assert bulk_import_request.project() == configured_project_without_data_rows
     assert bulk_import_request.name == name
     assert bulk_import_request.input_file_url == url
     assert bulk_import_request.error_file_url is None
@@ -158,14 +158,13 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
 
 
 @pytest.mark.slow
-def test_wait_till_done(rectangle_inference, configured_project):
+def test_wait_till_done(rectangle_inference,
+                        configured_project_without_data_rows):
     name = str(uuid.uuid4())
-    url = configured_project.client.upload_data(content=parser.dumps(
-        [rectangle_inference]),
-                                                sign=True)
-    bulk_import_request = configured_project.upload_annotations(name=name,
-                                                                annotations=url,
-                                                                validate=False)
+    url = configured_project_without_data_rows.client.upload_data(
+        content=parser.dumps([rectangle_inference]), sign=True)
+    bulk_import_request = configured_project_without_data_rows.upload_annotations(
+        name=name, annotations=url, validate=False)
 
     assert len(bulk_import_request.inputs) == 1
     bulk_import_request.wait_until_done()
diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py
index f8e392cf5..30559198b 100644
--- a/tests/integration/annotation_import/test_data_types.py
+++ b/tests/integration/annotation_import/test_data_types.py
@@ -125,7 +125,6 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name):
         [data_row.uid],  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
     )
-    project.data_row_ids.append(data_row.uid)
 
     return data_row
 
@@ -135,12 +134,12 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name):
     AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData,
     TextData
 ])
-def test_import_data_types(client, configured_project, initial_dataset,
-                           rand_gen, data_row_json_by_data_type,
-                           annotations_by_data_type, data_type_class):
+def test_import_data_types(client, project, initial_dataset, rand_gen,
+                           data_row_json_by_data_type, annotations_by_data_type,
+                           data_type_class):
 
-    project = configured_project
-    project_id = configured_project.uid
+    project = project
+    project_id = project.uid
     dataset = initial_dataset
 
     set_project_media_type_from_data_type(project, data_type_class)
@@ -261,11 +260,11 @@ def test_import_data_types_v2(client, configured_project, initial_dataset,
 
 
 @pytest.mark.parametrize('data_type, data_class, annotations', test_params)
-def test_import_label_annotations(client, configured_project, initial_dataset,
-                                  data_row_json_by_data_type, data_type,
-                                  data_class, annotations, rand_gen):
+def test_import_label_annotations(client, configured_project_without_data_rows,
+                                  initial_dataset, data_row_json_by_data_type,
+                                  data_type, data_class, annotations, rand_gen):
 
-    project = configured_project
+    project = configured_project_without_data_rows
     dataset = initial_dataset
     set_project_media_type_from_data_type(project, data_class)
 
@@ -297,7 +296,8 @@ def test_import_label_annotations(client, configured_project, initial_dataset,
     assert export_task.errors is None
     expected_annotations = get_annotation_comparison_dicts_from_labels(labels)
     actual_annotations = get_annotation_comparison_dicts_from_export(
-        export_task.result, data_row.uid, configured_project.uid)
+        export_task.result, data_row.uid,
+        configured_project_without_data_rows.uid)
     assert actual_annotations == expected_annotations
     data_row.delete()
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 92e23a375..c47524ed6 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -3,6 +3,7 @@
 import json
 import os
 import re
+import sys
 import time
 import uuid
 from enum import Enum
@@ -390,9 +391,21 @@ def initial_dataset(client, rand_gen):
 
 
 @pytest.fixture
-def configured_project(project, initial_dataset, client, rand_gen, image_url):
+def project_with_ontology(project):
+    editor = list(
+        project.client.get_labeling_frontends(
+            where=LabelingFrontend.name == "editor"))[0]
+    empty_ontology = {"tools": [], "classifications": []}
+    project.setup(editor, empty_ontology)
+    yield project
+
+
+@pytest.fixture
+def configured_project(project_with_ontology, initial_dataset, rand_gen,
+                       image_url):
     dataset = initial_dataset
     data_row_id = dataset.create_data_row(row_data=image_url).uid
+    project = project_with_ontology
 
     project.create_batch(
         rand_gen(str),
@@ -401,14 +414,7 @@ def configured_project(project, initial_dataset, client, rand_gen, image_url):
     )
     project.data_row_ids = [data_row_id]
 
-    editor = list(
-        project.client.get_labeling_frontends(
-            where=LabelingFrontend.name == "editor"))[0]
-    empty_ontology = {"tools": [], "classifications": []}
-    project.setup(editor, empty_ontology)
     yield project
-    dataset.delete()
-    project.delete()
 
 
 @pytest.fixture
@@ -833,6 +839,8 @@ def pytest_fixture_setup(fixturedef, request):
 
 @pytest.fixture(scope='session', autouse=True)
 def print_perf_summary():
+    print("Starting measurements\n", file=sys.stderr)
+
     yield
 
     sorted_dict = dict(
@@ -841,5 +849,6 @@ def print_perf_summary():
     slowest_fixtures = [
         (aaa, sorted_dict[aaa]) for aaa in islice(sorted_dict, num_of_entries)
     ]
-    print("\nTop slowest fixtures:\n", slowest_fixtures)
-    print("Data row report:\n", pytest.data_row_report)
+    print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr)
+    print("Data row report:\n", pytest.data_row_report, file=sys.stderr)
+    # assert False
diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py
index b9467e0dd..b3b683a3d 100644
--- a/tests/integration/test_project.py
+++ b/tests/integration/test_project.py
@@ -171,15 +171,15 @@ def test_attach_instructions(client, project):
 
 @pytest.mark.skipif(condition=os.environ['LABELBOX_TEST_ENVIRON'] == "onprem",
                     reason="new mutation does not work for onprem")
-def test_html_instructions(configured_project):
+def test_html_instructions(project_with_ontology):
     html_file_path = '/tmp/instructions.html'
     sample_html_str = "<html></html>"
 
     with open(html_file_path, 'w') as file:
         file.write(sample_html_str)
 
-    configured_project.upsert_instructions(html_file_path)
-    updated_ontology = configured_project.ontology().normalized
+    project_with_ontology.upsert_instructions(html_file_path)
+    updated_ontology = project_with_ontology.ontology().normalized
 
     instructions = updated_ontology.pop('projectInstructions')
     assert requests.get(instructions).text == sample_html_str
@@ -200,10 +200,6 @@ def test_same_ontology_after_instructions(
     assert instructions is not None
 
 
-def test_queue_mode(configured_project: Project):
-    assert configured_project.queue_mode == QueueMode.Batch
-
-
 def test_batches(project: Project, dataset: Dataset, image_url):
     task = dataset.create_data_rows([
         {
@@ -243,9 +239,9 @@ def test_create_batch_with_global_keys_async(project: Project, data_rows):
     assert batch_data_rows == set(data_rows)
 
 
-def test_media_type(client, configured_project: Project, rand_gen):
+def test_media_type(client, project: Project, rand_gen):
     # Existing project with no media_type
-    assert isinstance(configured_project.media_type, MediaType)
+    assert isinstance(project.media_type, MediaType)
 
     # Update test
     project = client.create_project(name=rand_gen(str))

From 4976908f25ece76697cda14efe952d061f69a92c Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Tue, 15 Aug 2023 15:05:36 -0700
Subject: [PATCH 03/10] Adding an option to configure source of data rows for
 predictions, also making ndjson test use project without datatows

---
 .../integration/annotation_import/conftest.py |  81 +++++++++-
 .../test_ndjson_validation.py                 | 138 +++++++++++-------
 tests/integration/conftest.py                 |   2 -
 3 files changed, 158 insertions(+), 63 deletions(-)

diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
index 1f88de47a..ca34d2dfb 100644
--- a/tests/integration/annotation_import/conftest.py
+++ b/tests/integration/annotation_import/conftest.py
@@ -486,8 +486,27 @@ def initial_dataset(client, rand_gen):
 
 
 @pytest.fixture
-def configured_project(client, configured_project_without_data_rows,
-                       initial_dataset, ontology, rand_gen, image_url):
+def hardcoded_datarow_id():
+    data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q'
+
+    def get_data_row_id(indx=0):
+        return data_row_id
+
+    yield get_data_row_id
+
+
+@pytest.fixture
+def configured_project_datarow_id(configured_project):
+
+    def get_data_row_id(indx=0):
+        return configured_project.data_row_ids[indx]
+
+    yield get_data_row_id
+
+
+@pytest.fixture
+def configured_project(configured_project_without_data_rows, initial_dataset,
+                       ontology, rand_gen, image_url):
     start_time = time.time()
     dataset = initial_dataset
     project = configured_project_without_data_rows
@@ -509,6 +528,7 @@ def configured_project(client, configured_project_without_data_rows,
     )
     print("After creating batch ", time.time() - start_time)
     project.data_row_ids = data_row_ids
+
     yield project
     project.delete()
 
@@ -577,10 +597,19 @@ def configured_project_without_data_rows(client, ontology, rand_gen):
 # In an example of a 'rectangle' we have extended to support multiple instances of the same tool type
 # TODO: we will support this approach in the future for all tools
 @pytest.fixture
-def prediction_id_mapping(configured_project):
+def prediction_id_mapping(configured_project_without_data_rows, ontology,
+                          request):
     # Maps tool types to feature schema ids
-    project = configured_project
+    if 'configured_project' in request.fixturenames:
+        data_row_id_factory = request.getfixturevalue(
+            'configured_project_datarow_id')
+        project = configured_project
+    else:
+        data_row_id_factory = request.getfixturevalue('hardcoded_datarow_id')
+        project = configured_project_without_data_rows
+
     ontology = project.ontology().normalized
+
     result = {}
 
     for idx, tool in enumerate(ontology['tools'] + ontology['classifications']):
@@ -597,7 +626,47 @@ def prediction_id_mapping(configured_project):
                 "schemaId": tool['featureSchemaId'],
                 "name": tool['name'],
                 "dataRow": {
-                    "id": project.data_row_ids[idx],
+                    "id": data_row_id_factory(idx),
+                },
+                'tool': tool
+            }
+            if tool_type not in result:
+                result[tool_type] = []
+            result[tool_type].append(value)
+        else:
+            result[tool_type] = {
+                "uuid": str(uuid.uuid4()),
+                "schemaId": tool['featureSchemaId'],
+                "name": tool['name'],
+                "dataRow": {
+                    "id": data_row_id_factory(idx),
+                },
+                'tool': tool
+            }
+    return result
+
+
+@pytest.fixture
+def prediction_id_mapping_datarow_id():
+    # Maps tool types to feature schema ids
+    data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q'
+    result = {}
+
+    for _, tool in enumerate(ontology['tools'] + ontology['classifications']):
+        if 'tool' in tool:
+            tool_type = tool['tool']
+        else:
+            tool_type = tool[
+                'type'] if 'scope' not in tool else f"{tool['type']}_{tool['scope']}"  # so 'checklist' of 'checklist_index'
+
+        # TODO: remove this once we have a better way to associate multiple tools instances with a single tool type
+        if tool_type == 'rectangle':
+            value = {
+                "uuid": str(uuid.uuid4()),
+                "schemaId": tool['featureSchemaId'],
+                "name": tool['name'],
+                "dataRow": {
+                    "id": data_row_id,
                 },
                 'tool': tool
             }
@@ -610,7 +679,7 @@ def prediction_id_mapping(configured_project):
                 "schemaId": tool['featureSchemaId'],
                 "name": tool['name'],
                 "dataRow": {
-                    "id": project.data_row_ids[idx],
+                    "id": data_row_id,
                 },
                 'tool': tool
             }
diff --git a/tests/integration/annotation_import/test_ndjson_validation.py b/tests/integration/annotation_import/test_ndjson_validation.py
index 53bb85eed..466968e85 100644
--- a/tests/integration/annotation_import/test_ndjson_validation.py
+++ b/tests/integration/annotation_import/test_ndjson_validation.py
@@ -9,6 +9,24 @@
                                                  NDRadio, NDRectangle, NDText,
                                                  NDTextEntity, NDTool,
                                                  _validate_ndjson)
+from labelbox.schema.labeling_frontend import LabelingFrontend
+from labelbox.schema.queue_mode import QueueMode
+
+
+@pytest.fixture
+def configured_project_without_data_rows(client,
+                                         ontology,
+                                         rand_gen,
+                                         scope="module"):
+    project = client.create_project(name=rand_gen(str),
+                                    description=rand_gen(str),
+                                    queue_mode=QueueMode.Batch)
+    editor = list(
+        client.get_labeling_frontends(
+            where=LabelingFrontend.name == "editor"))[0]
+    project.setup(editor, ontology)
+    yield project
+    project.delete()
 
 
 def test_classification_construction(checklist_inference, text_inference):
@@ -37,187 +55,198 @@ def test_tool_construction(inference, expected_type):
 
 
 def test_incorrect_feature_schema(rectangle_inference, polygon_inference,
-                                  configured_project):
+                                  configured_project_without_data_rows):
     #Valid but incorrect feature schema
     #Prob the error message says something about the config not anything useful. We might want to fix this.
     pred = rectangle_inference.copy()
     pred['schemaId'] = polygon_inference['schemaId']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def no_tool(text_inference, configured_project):
+def no_tool(text_inference, configured_project_without_data_rows):
     pred = text_inference.copy()
     #Missing key
     del pred['answer']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_invalid_text(text_inference, configured_project):
+def test_invalid_text(text_inference, configured_project_without_data_rows):
     #and if it is not a string
     pred = text_inference.copy()
     #Extra and wrong key
     del pred['answer']
     pred['answers'] = []
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
     del pred['answers']
 
     #Invalid type
     pred['answer'] = []
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
     #Invalid type
     pred['answer'] = None
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_invalid_checklist_item(checklist_inference, configured_project):
+def test_invalid_checklist_item(checklist_inference,
+                                configured_project_without_data_rows):
     #Only two points
     pred = checklist_inference.copy()
     pred['answers'] = [pred['answers'][0], pred['answers'][0]]
     #Duplicate schema ids
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
     pred['answers'] = [{"name": "asdfg"}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
     pred['answers'] = [{"schemaId": "1232132132"}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
     pred['answers'] = [{}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
     pred['answers'] = []
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
     del pred['answers']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_invalid_polygon(polygon_inference, configured_project):
+def test_invalid_polygon(polygon_inference,
+                         configured_project_without_data_rows):
     #Only two points
     pred = polygon_inference.copy()
     pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_incorrect_entity(entity_inference, configured_project):
+def test_incorrect_entity(entity_inference,
+                          configured_project_without_data_rows):
     entity = entity_inference.copy()
     #Location cannot be a list
     entity["location"] = [0, 10]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([entity], configured_project)
+        _validate_ndjson([entity], configured_project_without_data_rows)
 
     entity["location"] = {"start": -1, "end": 5}
     with pytest.raises(MALValidationError):
-        _validate_ndjson([entity], configured_project)
+        _validate_ndjson([entity], configured_project_without_data_rows)
 
     entity["location"] = {"start": 15, "end": 5}
     with pytest.raises(MALValidationError):
-        _validate_ndjson([entity], configured_project)
+        _validate_ndjson([entity], configured_project_without_data_rows)
 
 
-def test_incorrect_mask(segmentation_inference, configured_project):
+def test_incorrect_mask(segmentation_inference,
+                        configured_project_without_data_rows):
     seg = segmentation_inference.copy()
     seg['mask']['colorRGB'] = [-1, 0, 10]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([seg], configured_project)
+        _validate_ndjson([seg], configured_project_without_data_rows)
 
     seg['mask']['colorRGB'] = [0, 0]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([seg], configured_project)
+        _validate_ndjson([seg], configured_project_without_data_rows)
 
     seg['mask'] = {'counts': [0], 'size': [0, 1]}
     with pytest.raises(MALValidationError):
-        _validate_ndjson([seg], configured_project)
+        _validate_ndjson([seg], configured_project_without_data_rows)
 
     seg['mask'] = {'counts': [-1], 'size': [1, 1]}
     with pytest.raises(MALValidationError):
-        _validate_ndjson([seg], configured_project)
+        _validate_ndjson([seg], configured_project_without_data_rows)
 
 
-def test_all_validate_json(configured_project, predictions):
+def test_all_validate_json(configured_project_without_data_rows, predictions):
     #Predictions contains one of each type of prediction.
     #These should be properly formatted and pass.
-    _validate_ndjson(predictions, configured_project)
+    _validate_ndjson(predictions, configured_project_without_data_rows)
 
 
-def test_incorrect_line(line_inference, configured_project):
+def test_incorrect_line(line_inference, configured_project_without_data_rows):
     line = line_inference.copy()
     line["line"] = [line["line"][0]]  #Just one point
     with pytest.raises(MALValidationError):
-        _validate_ndjson([line], configured_project)
+        _validate_ndjson([line], configured_project_without_data_rows)
 
 
-def test_incorrect_rectangle(rectangle_inference, configured_project):
+def test_incorrect_rectangle(rectangle_inference,
+                             configured_project_without_data_rows):
     del rectangle_inference['bbox']['top']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([rectangle_inference], configured_project)
+        _validate_ndjson([rectangle_inference],
+                         configured_project_without_data_rows)
 
 
-def test_duplicate_tools(rectangle_inference, configured_project):
+def test_duplicate_tools(rectangle_inference,
+                         configured_project_without_data_rows):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_invalid_feature_schema(configured_project, rectangle_inference):
+def test_invalid_feature_schema(configured_project_without_data_rows,
+                                rectangle_inference):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     pred['schemaId'] = "blahblah"
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_name_only_feature_schema(configured_project, rectangle_inference):
+def test_name_only_feature_schema(configured_project_without_data_rows,
+                                  rectangle_inference):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     del pred['schemaId']
-    _validate_ndjson([pred], configured_project)
+    _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_schema_id_only_feature_schema(configured_project, rectangle_inference):
+def test_schema_id_only_feature_schema(configured_project_without_data_rows,
+                                       rectangle_inference):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     del pred['name']
-    _validate_ndjson([pred], configured_project)
+    _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_missing_feature_schema(configured_project, rectangle_inference):
+def test_missing_feature_schema(configured_project_without_data_rows,
+                                rectangle_inference):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     del pred['schemaId']
     del pred['name']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project)
+        _validate_ndjson([pred], configured_project_without_data_rows)
 
 
-def test_validate_ndjson(tmp_path, configured_project):
+def test_validate_ndjson(tmp_path, configured_project_without_data_rows):
     file_name = f"broken.ndjson"
     file_path = tmp_path / file_name
     with file_path.open("w") as f:
         f.write("test")
 
     with pytest.raises(ValueError):
-        configured_project.upload_annotations(name="name",
-                                              annotations=str(file_path),
-                                              validate=True)
+        configured_project_without_data_rows.upload_annotations(
+            name="name", annotations=str(file_path), validate=True)
 
 
-def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
+def test_validate_ndjson_uuid(tmp_path, configured_project_without_data_rows,
+                              predictions):
     file_name = f"repeat_uuid.ndjson"
     file_path = tmp_path / file_name
     repeat_uuid = predictions.copy()
@@ -228,16 +257,15 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
         parser.dump(repeat_uuid, f)
 
     with pytest.raises(MALValidationError):
-        configured_project.upload_annotations(name="name",
-                                              validate=True,
-                                              annotations=str(file_path))
+        configured_project_without_data_rows.upload_annotations(
+            name="name", validate=True, annotations=str(file_path))
 
     with pytest.raises(MALValidationError):
-        configured_project.upload_annotations(name="name",
-                                              validate=True,
-                                              annotations=repeat_uuid)
+        configured_project_without_data_rows.upload_annotations(
+            name="name", validate=True, annotations=repeat_uuid)
 
 
-def test_video_upload(video_checklist_inference, configured_project):
+def test_video_upload(video_checklist_inference,
+                      configured_project_without_data_rows):
     pred = video_checklist_inference.copy()
-    _validate_ndjson([pred], configured_project)
+    _validate_ndjson([pred], configured_project_without_data_rows)
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index c47524ed6..82c739ddb 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -839,8 +839,6 @@ def pytest_fixture_setup(fixturedef, request):
 
 @pytest.fixture(scope='session', autouse=True)
 def print_perf_summary():
-    print("Starting measurements\n", file=sys.stderr)
-
     yield
 
     sorted_dict = dict(

From 551c1efe0cc87b60c1b2f6509f613b75cb432d4b Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Tue, 15 Aug 2023 15:24:56 -0700
Subject: [PATCH 04/10] Replacing configured_project

---
 pytest.ini                                    |   2 +-
 tests/conftest.py                             |   2 +-
 .../integration/annotation_import/conftest.py | 120 ++++++++---------
 .../test_bulk_import_request.py               |  41 +++---
 .../test_conversation_import.py               |   6 +-
 .../annotation_import/test_data_types.py      |  23 ++--
 .../annotation_import/test_label_import.py    |  19 ++-
 .../annotation_import/test_model.py           |   4 +-
 .../annotation_import/test_model_run.py       |  14 +-
 .../test_ndjson_validation.py                 | 121 +++++++++---------
 .../test_upsert_prediction_import.py          |  18 +--
 tests/integration/conftest.py                 |  15 ++-
 tests/integration/export_v2/conftest.py       |   2 +-
 .../export_v2/test_export_video.py            |   4 +-
 14 files changed, 194 insertions(+), 197 deletions(-)

diff --git a/pytest.ini b/pytest.ini
index fbf64a864..b56afefdd 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,4 +1,4 @@
 [pytest]
-addopts = -s -vv
+addopts = -s -vv --reruns 5 --reruns-delay 10 --durations=20
 markers =
     slow: marks tests as slow (deselect with '-m "not slow"')
diff --git a/tests/conftest.py b/tests/conftest.py
index b4dd6dce0..b724426d8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -12,7 +12,7 @@
 ]
 
 
-@pytest.fixture
+@pytest.fixture(scope="session")
 def rand_gen():
 
     def gen(field_type):
diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
index ca34d2dfb..3f1cd7de5 100644
--- a/tests/integration/annotation_import/conftest.py
+++ b/tests/integration/annotation_import/conftest.py
@@ -211,7 +211,7 @@ def annotations_by_data_type_v2(
     }
 
 
-@pytest.fixture
+@pytest.fixture(scope='session')
 def ontology():
     bbox_tool_with_nested_text = {
         'required':
@@ -479,48 +479,45 @@ def func(project):
 
 
 @pytest.fixture
-def initial_dataset(client, rand_gen):
-    dataset = client.create_dataset(name=rand_gen(str))
-    yield dataset
-    dataset.delete()
-
-
-@pytest.fixture
-def hardcoded_datarow_id():
-    data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q'
+def configured_project_datarow_id(configured_project):
 
     def get_data_row_id(indx=0):
-        return data_row_id
+        return configured_project.data_row_ids[indx]
 
     yield get_data_row_id
 
 
 @pytest.fixture
-def configured_project_datarow_id(configured_project):
+def configured_project_one_datarow_id(configured_project_with_one_data_row):
 
     def get_data_row_id(indx=0):
-        return configured_project.data_row_ids[indx]
+        return configured_project_with_one_data_row.data_row_ids[0]
 
     yield get_data_row_id
 
 
 @pytest.fixture
-def configured_project(configured_project_without_data_rows, initial_dataset,
-                       ontology, rand_gen, image_url):
+def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
     start_time = time.time()
     dataset = initial_dataset
-    project = configured_project_without_data_rows
+    project = client.create_project(name=rand_gen(str),
+                                    queue_mode=QueueMode.Batch)
+    editor = list(
+        client.get_labeling_frontends(
+            where=LabelingFrontend.name == "editor"))[0]
+    project.setup(editor, ontology)
+    num_rows = 0
 
     data_row_ids = []
-    # print("Before creating data rows ", time.time() - start_time)
-    num_rows = 0
+
     for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
         data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
         num_rows += 1
-    # print("After creating data rows ", time.time() - start_time)
-
-    pytest.data_row_report['times'] += time.time() - start_time
-    pytest.data_row_report['num_rows'] += num_rows
+    project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
+                                                sleep_interval=3)
+    if pytest.data_row_report:
+        pytest.data_row_report['times'] += time.time() - start_time
+        pytest.data_row_report['num_rows'] += num_rows
     project.create_batch(
         rand_gen(str),
         data_row_ids,  # sample of data row objects
@@ -580,7 +577,10 @@ def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row,
 
 
 @pytest.fixture
-def configured_project_without_data_rows(client, ontology, rand_gen):
+def configured_project_with_one_data_row(client, ontology, rand_gen,
+                                         initial_dataset, image_url):
+    start_time = time.time()
+
     project = client.create_project(name=rand_gen(str),
                                     description=rand_gen(str),
                                     queue_mode=QueueMode.Batch)
@@ -588,7 +588,25 @@ def configured_project_without_data_rows(client, ontology, rand_gen):
         client.get_labeling_frontends(
             where=LabelingFrontend.name == "editor"))[0]
     project.setup(editor, ontology)
+
+    data_row = initial_dataset.create_data_row(row_data=image_url)
+    data_row_ids = [data_row.uid]
+    project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
+                                                sleep_interval=3)
+
+    if pytest.data_row_report:
+        pytest.data_row_report['times'] += time.time() - start_time
+        pytest.data_row_report['num_rows'] += 1
+    batch = project.create_batch(
+        rand_gen(str),
+        data_row_ids,  # sample of data row objects
+        5  # priority between 1(Highest) - 5(lowest)
+    )
+    project.data_row_ids = data_row_ids
+
     yield project
+
+    batch.delete()
     project.delete()
 
 
@@ -597,16 +615,20 @@ def configured_project_without_data_rows(client, ontology, rand_gen):
 # In an example of a 'rectangle' we have extended to support multiple instances of the same tool type
 # TODO: we will support this approach in the future for all tools
 @pytest.fixture
-def prediction_id_mapping(configured_project_without_data_rows, ontology,
-                          request):
+def prediction_id_mapping(ontology, request):
     # Maps tool types to feature schema ids
     if 'configured_project' in request.fixturenames:
         data_row_id_factory = request.getfixturevalue(
             'configured_project_datarow_id')
-        project = configured_project
-    else:
+        project = request.getfixturevalue('configured_project')
+    elif 'hardcoded_datarow_id' in request.fixturenames:
         data_row_id_factory = request.getfixturevalue('hardcoded_datarow_id')
-        project = configured_project_without_data_rows
+        project = request.getfixturevalue('configured_project_with_ontology')
+    else:
+        data_row_id_factory = request.getfixturevalue(
+            'configured_project_one_datarow_id')
+        project = request.getfixturevalue(
+            'configured_project_with_one_data_row')
 
     ontology = project.ontology().normalized
 
@@ -646,46 +668,6 @@ def prediction_id_mapping(configured_project_without_data_rows, ontology,
     return result
 
 
-@pytest.fixture
-def prediction_id_mapping_datarow_id():
-    # Maps tool types to feature schema ids
-    data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q'
-    result = {}
-
-    for _, tool in enumerate(ontology['tools'] + ontology['classifications']):
-        if 'tool' in tool:
-            tool_type = tool['tool']
-        else:
-            tool_type = tool[
-                'type'] if 'scope' not in tool else f"{tool['type']}_{tool['scope']}"  # so 'checklist' of 'checklist_index'
-
-        # TODO: remove this once we have a better way to associate multiple tools instances with a single tool type
-        if tool_type == 'rectangle':
-            value = {
-                "uuid": str(uuid.uuid4()),
-                "schemaId": tool['featureSchemaId'],
-                "name": tool['name'],
-                "dataRow": {
-                    "id": data_row_id,
-                },
-                'tool': tool
-            }
-            if tool_type not in result:
-                result[tool_type] = []
-            result[tool_type].append(value)
-        else:
-            result[tool_type] = {
-                "uuid": str(uuid.uuid4()),
-                "schemaId": tool['featureSchemaId'],
-                "name": tool['name'],
-                "dataRow": {
-                    "id": data_row_id,
-                },
-                'tool': tool
-            }
-    return result
-
-
 @pytest.fixture
 def polygon_inference(prediction_id_mapping):
     polygon = prediction_id_mapping['polygon'].copy()
@@ -1079,7 +1061,6 @@ def model_run_with_training_metadata(rand_gen, model):
 @pytest.fixture
 def model_run_with_data_rows(client, configured_project, model_run_predictions,
                              model_run, wait_for_label_processing):
-    start_time = time.time()
     configured_project.enable_model_assisted_labeling()
 
     upload_task = LabelImport.create_from_objects(
@@ -1093,7 +1074,6 @@ def model_run_with_data_rows(client, configured_project, model_run_predictions,
     labels = wait_for_label_processing(configured_project)
     label_ids = [label.uid for label in labels]
     model_run.upsert_labels(label_ids)
-    print(f"model_run_with_data_rows: {time.time() - start_time}")
     yield model_run
     model_run.delete()
     # TODO: Delete resources when that is possible ..
diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py
index 7a66dd667..6691cc044 100644
--- a/tests/integration/annotation_import/test_bulk_import_request.py
+++ b/tests/integration/annotation_import/test_bulk_import_request.py
@@ -51,14 +51,14 @@ def test_validate_file(project_with_ontology):
         #Schema ids shouldn't match
 
 
-def test_create_from_objects(configured_project_without_data_rows, predictions,
+def test_create_from_objects(configured_project_with_one_data_row, predictions,
                              annotation_import_test_helpers):
     name = str(uuid.uuid4())
 
-    bulk_import_request = configured_project_without_data_rows.upload_annotations(
+    bulk_import_request = configured_project_with_one_data_row.upload_annotations(
         name=name, annotations=predictions)
 
-    assert bulk_import_request.project() == configured_project_without_data_rows
+    assert bulk_import_request.project() == configured_project_with_one_data_row
     assert bulk_import_request.name == name
     assert bulk_import_request.error_file_url is None
     assert bulk_import_request.status_file_url is None
@@ -105,17 +105,17 @@ def test_create_from_local_file(tmp_path, predictions, configured_project,
         bulk_import_request.input_file_url, predictions)
 
 
-def test_get(client, configured_project_without_data_rows):
+def test_get(client, configured_project_with_one_data_row):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
-    configured_project_without_data_rows.upload_annotations(name=name,
+    configured_project_with_one_data_row.upload_annotations(name=name,
                                                             annotations=url,
                                                             validate=False)
 
     bulk_import_request = BulkImportRequest.from_name(
-        client, project_id=configured_project_without_data_rows.uid, name=name)
+        client, project_id=configured_project_with_one_data_row.uid, name=name)
 
-    assert bulk_import_request.project() == configured_project_without_data_rows
+    assert bulk_import_request.project() == configured_project_with_one_data_row
     assert bulk_import_request.name == name
     assert bulk_import_request.input_file_url == url
     assert bulk_import_request.error_file_url is None
@@ -123,16 +123,15 @@ def test_get(client, configured_project_without_data_rows):
     assert bulk_import_request.state == BulkImportRequestState.RUNNING
 
 
-def test_validate_ndjson(tmp_path, configured_project):
+def test_validate_ndjson(tmp_path, configured_project_with_one_data_row):
     file_name = f"broken.ndjson"
     file_path = tmp_path / file_name
     with file_path.open("w") as f:
         f.write("test")
 
     with pytest.raises(ValueError):
-        configured_project.upload_annotations(name="name",
-                                              validate=True,
-                                              annotations=str(file_path))
+        configured_project_with_one_data_row.upload_annotations(
+            name="name", validate=True, annotations=str(file_path))
 
 
 def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
@@ -159,11 +158,11 @@ def test_validate_ndjson_uuid(tmp_path, configured_project, predictions):
 
 @pytest.mark.slow
 def test_wait_till_done(rectangle_inference,
-                        configured_project_without_data_rows):
+                        configured_project_with_one_data_row):
     name = str(uuid.uuid4())
-    url = configured_project_without_data_rows.client.upload_data(
+    url = configured_project_with_one_data_row.client.upload_data(
         content=parser.dumps([rectangle_inference]), sign=True)
-    bulk_import_request = configured_project_without_data_rows.upload_annotations(
+    bulk_import_request = configured_project_with_one_data_row.upload_annotations(
         name=name, annotations=url, validate=False)
 
     assert len(bulk_import_request.inputs) == 1
@@ -298,7 +297,7 @@ def test_pdf_mal_bbox(client, configured_project_pdf):
     assert import_annotations.errors == []
 
 
-def test_pdf_document_entity(client, configured_project_without_data_rows,
+def test_pdf_document_entity(client, configured_project_with_one_data_row,
                              dataset_pdf_entity, rand_gen):
     # for content "Metal-insulator (MI) transitions have been one of the" in OCR JSON extract tests/assets/arxiv-pdf_data_99-word-token-pdfs_0801.3483-lb-textlayer.json
     document_text_selection = DocumentTextSelection(
@@ -322,7 +321,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,
 
     labels = []
     _, data_row_uids = dataset_pdf_entity
-    configured_project_without_data_rows.create_batch(
+    configured_project_with_one_data_row.create_batch(
         rand_gen(str),
         data_row_uids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
@@ -337,7 +336,7 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,
 
     import_annotations = MALPredictionImport.create_from_objects(
         client=client,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         name=f"import {str(uuid.uuid4())}",
         predictions=labels)
     import_annotations.wait_until_done()
@@ -346,14 +345,14 @@ def test_pdf_document_entity(client, configured_project_without_data_rows,
 
 
 def test_nested_video_object_annotations(client,
-                                         configured_project_without_data_rows,
+                                         configured_project_with_one_data_row,
                                          video_data,
                                          bbox_video_annotation_objects,
                                          rand_gen):
     labels = []
     _, data_row_uids = video_data
-    configured_project_without_data_rows.update(media_type=MediaType.Video)
-    configured_project_without_data_rows.create_batch(
+    configured_project_with_one_data_row.update(media_type=MediaType.Video)
+    configured_project_with_one_data_row.create_batch(
         rand_gen(str),
         data_row_uids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
@@ -365,7 +364,7 @@ def test_nested_video_object_annotations(client,
                   annotations=bbox_video_annotation_objects))
     import_annotations = MALPredictionImport.create_from_objects(
         client=client,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         name=f"import {str(uuid.uuid4())}",
         predictions=labels)
     import_annotations.wait_until_done()
diff --git a/tests/integration/annotation_import/test_conversation_import.py b/tests/integration/annotation_import/test_conversation_import.py
index ac2d5419c..9f1d26e31 100644
--- a/tests/integration/annotation_import/test_conversation_import.py
+++ b/tests/integration/annotation_import/test_conversation_import.py
@@ -7,7 +7,7 @@
 from labelbox.schema.annotation_import import MALPredictionImport
 
 
-def test_conversation_entity(client, configured_project_without_data_rows,
+def test_conversation_entity(client, configured_project_with_one_data_row,
                              dataset_conversation_entity, rand_gen):
 
     conversation_entity_annotation = ConversationEntity(start=0,
@@ -20,7 +20,7 @@ def test_conversation_entity(client, configured_project_without_data_rows,
     labels = []
     _, data_row_uids = dataset_conversation_entity
 
-    configured_project_without_data_rows.create_batch(
+    configured_project_with_one_data_row.create_batch(
         rand_gen(str),
         data_row_uids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
@@ -35,7 +35,7 @@ def test_conversation_entity(client, configured_project_without_data_rows,
 
     import_annotations = MALPredictionImport.create_from_objects(
         client=client,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         name=f"import {str(uuid.uuid4())}",
         predictions=labels)
 
diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py
index 30559198b..5de79f5cc 100644
--- a/tests/integration/annotation_import/test_data_types.py
+++ b/tests/integration/annotation_import/test_data_types.py
@@ -125,6 +125,7 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name):
         [data_row.uid],  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
     )
+    project.data_row_ids.append(data_row.uid)
 
     return data_row
 
@@ -134,11 +135,11 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name):
     AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData,
     TextData
 ])
-def test_import_data_types(client, project, initial_dataset, rand_gen,
-                           data_row_json_by_data_type, annotations_by_data_type,
-                           data_type_class):
+def test_import_data_types(client, configured_project, initial_dataset,
+                           rand_gen, data_row_json_by_data_type,
+                           annotations_by_data_type, data_type_class):
 
-    project = project
+    project = configured_project
     project_id = project.uid
     dataset = initial_dataset
 
@@ -260,11 +261,11 @@ def test_import_data_types_v2(client, configured_project, initial_dataset,
 
 
 @pytest.mark.parametrize('data_type, data_class, annotations', test_params)
-def test_import_label_annotations(client, configured_project_without_data_rows,
+def test_import_label_annotations(client, configured_project_with_one_data_row,
                                   initial_dataset, data_row_json_by_data_type,
                                   data_type, data_class, annotations, rand_gen):
 
-    project = configured_project_without_data_rows
+    project = configured_project_with_one_data_row
     dataset = initial_dataset
     set_project_media_type_from_data_type(project, data_class)
 
@@ -297,13 +298,13 @@ def test_import_label_annotations(client, configured_project_without_data_rows,
     expected_annotations = get_annotation_comparison_dicts_from_labels(labels)
     actual_annotations = get_annotation_comparison_dicts_from_export(
         export_task.result, data_row.uid,
-        configured_project_without_data_rows.uid)
+        configured_project_with_one_data_row.uid)
     assert actual_annotations == expected_annotations
     data_row.delete()
 
 
 @pytest.mark.parametrize('data_type, data_class, annotations', test_params)
-def test_import_mal_annotations(client, configured_project_without_data_rows,
+def test_import_mal_annotations(client, configured_project_with_one_data_row,
                                 data_row_json_by_data_type, data_type,
                                 data_class, annotations, rand_gen):
 
@@ -311,10 +312,10 @@ def test_import_mal_annotations(client, configured_project_without_data_rows,
     data_row_json = data_row_json_by_data_type[data_type]
     data_row = dataset.create_data_row(data_row_json)
 
-    set_project_media_type_from_data_type(configured_project_without_data_rows,
+    set_project_media_type_from_data_type(configured_project_with_one_data_row,
                                           data_class)
 
-    configured_project_without_data_rows.create_batch(
+    configured_project_with_one_data_row.create_batch(
         rand_gen(str),
         [data_row.uid],
     )
@@ -326,7 +327,7 @@ def test_import_mal_annotations(client, configured_project_without_data_rows,
 
     import_annotations = lb.MALPredictionImport.create_from_objects(
         client=client,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         name=f"import {str(uuid.uuid4())}",
         predictions=labels)
     import_annotations.wait_until_done()
diff --git a/tests/integration/annotation_import/test_label_import.py b/tests/integration/annotation_import/test_label_import.py
index 198ce2e3e..61c602c52 100644
--- a/tests/integration/annotation_import/test_label_import.py
+++ b/tests/integration/annotation_import/test_label_import.py
@@ -9,13 +9,16 @@
 """
 
 
-def test_create_from_url(client, configured_project,
+def test_create_from_url(client, configured_project_with_one_data_row,
                          annotation_import_test_helpers):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
     label_import = LabelImport.create_from_url(
-        client=client, project_id=configured_project.uid, name=name, url=url)
-    assert label_import.parent_id == configured_project.uid
+        client=client,
+        project_id=configured_project_with_one_data_row.uid,
+        name=name,
+        url=url)
+    assert label_import.parent_id == configured_project_with_one_data_row.uid
     annotation_import_test_helpers.check_running_state(label_import, name, url)
 
 
@@ -52,13 +55,17 @@ def test_create_from_objects(client, configured_project, object_predictions,
 #     annotation_import_test_helpers.assert_file_content(label_import.input_file_url, object_predictions)
 
 
-def test_get(client, configured_project, annotation_import_test_helpers):
+def test_get(client, configured_project_with_one_data_row,
+             annotation_import_test_helpers):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
     label_import = LabelImport.create_from_url(
-        client=client, project_id=configured_project.uid, name=name, url=url)
+        client=client,
+        project_id=configured_project_with_one_data_row.uid,
+        name=name,
+        url=url)
 
-    assert label_import.parent_id == configured_project.uid
+    assert label_import.parent_id == configured_project_with_one_data_row.uid
     annotation_import_test_helpers.check_running_state(label_import, name, url)
 
 
diff --git a/tests/integration/annotation_import/test_model.py b/tests/integration/annotation_import/test_model.py
index dcfe9ef2c..131ecd9d0 100644
--- a/tests/integration/annotation_import/test_model.py
+++ b/tests/integration/annotation_import/test_model.py
@@ -4,14 +4,14 @@
 from labelbox.exceptions import ResourceNotFoundError
 
 
-def test_model(client, configured_project, rand_gen):
+def test_model(client, configured_project_with_one_data_row, rand_gen):
     # Get all
     models = list(client.get_models())
     for m in models:
         assert isinstance(m, Model)
 
     # Create
-    ontology = configured_project.ontology()
+    ontology = configured_project_with_one_data_row.ontology()
     data = {"name": rand_gen(str), "ontology_id": ontology.uid}
     model = client.create_model(data["name"], data["ontology_id"])
     assert model.name == data["name"]
diff --git a/tests/integration/annotation_import/test_model_run.py b/tests/integration/annotation_import/test_model_run.py
index c94c78cde..328b38ba5 100644
--- a/tests/integration/annotation_import/test_model_run.py
+++ b/tests/integration/annotation_import/test_model_run.py
@@ -87,11 +87,12 @@ def test_model_run_data_rows_delete(model_run_with_data_rows):
     assert len(before) == len(after) + 1
 
 
-def test_model_run_upsert_data_rows(dataset, model_run, configured_project):
+def test_model_run_upsert_data_rows(dataset, model_run,
+                                    configured_project_with_one_data_row):
     n_model_run_data_rows = len(list(model_run.model_run_data_rows()))
     assert n_model_run_data_rows == 0
     data_row = dataset.create_data_row(row_data="test row data")
-    configured_project._wait_until_data_rows_are_processed(
+    configured_project_with_one_data_row._wait_until_data_rows_are_processed(
         data_row_ids=[data_row.uid])
     model_run.upsert_data_rows([data_row.uid])
     n_model_run_data_rows = len(list(model_run.model_run_data_rows()))
@@ -167,15 +168,14 @@ def get_model_run_status():
                                                errorMessage)
 
 
-def test_model_run_split_assignment_by_data_row_ids(model_run, dataset,
-                                                    image_url,
-                                                    configured_project):
-    n_data_rows = 10
+def test_model_run_split_assignment_by_data_row_ids(
+        model_run, dataset, image_url, configured_project_with_one_data_row):
+    n_data_rows = 2
     data_rows = dataset.create_data_rows([{
         "row_data": image_url
     } for _ in range(n_data_rows)])
     data_row_ids = [data_row['id'] for data_row in data_rows.result]
-    configured_project._wait_until_data_rows_are_processed(
+    configured_project_with_one_data_row._wait_until_data_rows_are_processed(
         data_row_ids=data_row_ids)
     model_run.upsert_data_rows(data_row_ids)
 
diff --git a/tests/integration/annotation_import/test_ndjson_validation.py b/tests/integration/annotation_import/test_ndjson_validation.py
index 466968e85..123752402 100644
--- a/tests/integration/annotation_import/test_ndjson_validation.py
+++ b/tests/integration/annotation_import/test_ndjson_validation.py
@@ -13,19 +13,27 @@
 from labelbox.schema.queue_mode import QueueMode
 
 
-@pytest.fixture
-def configured_project_without_data_rows(client,
-                                         ontology,
-                                         rand_gen,
-                                         scope="module"):
+@pytest.fixture(scope="module", autouse=True)
+def hardcoded_datarow_id():
+    data_row_id = 'ck8q9q9qj00003g5z3q1q9q9q'
+
+    def get_data_row_id(indx=0):
+        return data_row_id
+
+    yield get_data_row_id
+
+
+@pytest.fixture(scope="module", autouse=True)
+def configured_project_with_ontology(client, ontology, rand_gen):
     project = client.create_project(name=rand_gen(str),
-                                    description=rand_gen(str),
                                     queue_mode=QueueMode.Batch)
     editor = list(
         client.get_labeling_frontends(
             where=LabelingFrontend.name == "editor"))[0]
     project.setup(editor, ontology)
+
     yield project
+
     project.delete()
 
 
@@ -55,197 +63,194 @@ def test_tool_construction(inference, expected_type):
 
 
 def test_incorrect_feature_schema(rectangle_inference, polygon_inference,
-                                  configured_project_without_data_rows):
+                                  configured_project_with_ontology):
     #Valid but incorrect feature schema
     #Prob the error message says something about the config not anything useful. We might want to fix this.
     pred = rectangle_inference.copy()
     pred['schemaId'] = polygon_inference['schemaId']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def no_tool(text_inference, configured_project_without_data_rows):
+def no_tool(text_inference, configured_project_with_ontology):
     pred = text_inference.copy()
     #Missing key
     del pred['answer']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def test_invalid_text(text_inference, configured_project_without_data_rows):
+def test_invalid_text(text_inference, configured_project_with_ontology):
     #and if it is not a string
     pred = text_inference.copy()
     #Extra and wrong key
     del pred['answer']
     pred['answers'] = []
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
     del pred['answers']
 
     #Invalid type
     pred['answer'] = []
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
     #Invalid type
     pred['answer'] = None
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
 
 def test_invalid_checklist_item(checklist_inference,
-                                configured_project_without_data_rows):
+                                configured_project_with_ontology):
     #Only two points
     pred = checklist_inference.copy()
     pred['answers'] = [pred['answers'][0], pred['answers'][0]]
     #Duplicate schema ids
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
     pred['answers'] = [{"name": "asdfg"}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
     pred['answers'] = [{"schemaId": "1232132132"}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
     pred['answers'] = [{}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
     pred['answers'] = []
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
     del pred['answers']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def test_invalid_polygon(polygon_inference,
-                         configured_project_without_data_rows):
+def test_invalid_polygon(polygon_inference, configured_project_with_ontology):
     #Only two points
     pred = polygon_inference.copy()
     pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def test_incorrect_entity(entity_inference,
-                          configured_project_without_data_rows):
+def test_incorrect_entity(entity_inference, configured_project_with_ontology):
     entity = entity_inference.copy()
     #Location cannot be a list
     entity["location"] = [0, 10]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([entity], configured_project_without_data_rows)
+        _validate_ndjson([entity], configured_project_with_ontology)
 
     entity["location"] = {"start": -1, "end": 5}
     with pytest.raises(MALValidationError):
-        _validate_ndjson([entity], configured_project_without_data_rows)
+        _validate_ndjson([entity], configured_project_with_ontology)
 
     entity["location"] = {"start": 15, "end": 5}
     with pytest.raises(MALValidationError):
-        _validate_ndjson([entity], configured_project_without_data_rows)
+        _validate_ndjson([entity], configured_project_with_ontology)
 
 
 def test_incorrect_mask(segmentation_inference,
-                        configured_project_without_data_rows):
+                        configured_project_with_ontology):
     seg = segmentation_inference.copy()
     seg['mask']['colorRGB'] = [-1, 0, 10]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([seg], configured_project_without_data_rows)
+        _validate_ndjson([seg], configured_project_with_ontology)
 
     seg['mask']['colorRGB'] = [0, 0]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([seg], configured_project_without_data_rows)
+        _validate_ndjson([seg], configured_project_with_ontology)
 
     seg['mask'] = {'counts': [0], 'size': [0, 1]}
     with pytest.raises(MALValidationError):
-        _validate_ndjson([seg], configured_project_without_data_rows)
+        _validate_ndjson([seg], configured_project_with_ontology)
 
     seg['mask'] = {'counts': [-1], 'size': [1, 1]}
     with pytest.raises(MALValidationError):
-        _validate_ndjson([seg], configured_project_without_data_rows)
+        _validate_ndjson([seg], configured_project_with_ontology)
 
 
-def test_all_validate_json(configured_project_without_data_rows, predictions):
+def test_all_validate_json(configured_project_with_ontology, predictions):
     #Predictions contains one of each type of prediction.
     #These should be properly formatted and pass.
-    _validate_ndjson(predictions, configured_project_without_data_rows)
+    _validate_ndjson(predictions, configured_project_with_ontology)
 
 
-def test_incorrect_line(line_inference, configured_project_without_data_rows):
+def test_incorrect_line(line_inference, configured_project_with_ontology):
     line = line_inference.copy()
     line["line"] = [line["line"][0]]  #Just one point
     with pytest.raises(MALValidationError):
-        _validate_ndjson([line], configured_project_without_data_rows)
+        _validate_ndjson([line], configured_project_with_ontology)
 
 
 def test_incorrect_rectangle(rectangle_inference,
-                             configured_project_without_data_rows):
+                             configured_project_with_ontology):
     del rectangle_inference['bbox']['top']
     with pytest.raises(MALValidationError):
         _validate_ndjson([rectangle_inference],
-                         configured_project_without_data_rows)
+                         configured_project_with_ontology)
 
 
-def test_duplicate_tools(rectangle_inference,
-                         configured_project_without_data_rows):
+def test_duplicate_tools(rectangle_inference, configured_project_with_ontology):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     pred['polygon'] = [{"x": 100, "y": 100}, {"x": 200, "y": 200}]
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def test_invalid_feature_schema(configured_project_without_data_rows,
+def test_invalid_feature_schema(configured_project_with_ontology,
                                 rectangle_inference):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     pred['schemaId'] = "blahblah"
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def test_name_only_feature_schema(configured_project_without_data_rows,
+def test_name_only_feature_schema(configured_project_with_ontology,
                                   rectangle_inference):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     del pred['schemaId']
-    _validate_ndjson([pred], configured_project_without_data_rows)
+    _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def test_schema_id_only_feature_schema(configured_project_without_data_rows,
+def test_schema_id_only_feature_schema(configured_project_with_ontology,
                                        rectangle_inference):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     del pred['name']
-    _validate_ndjson([pred], configured_project_without_data_rows)
+    _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def test_missing_feature_schema(configured_project_without_data_rows,
+def test_missing_feature_schema(configured_project_with_ontology,
                                 rectangle_inference):
     #Trying to upload a polygon and rectangle at the same time
     pred = rectangle_inference.copy()
     del pred['schemaId']
     del pred['name']
     with pytest.raises(MALValidationError):
-        _validate_ndjson([pred], configured_project_without_data_rows)
+        _validate_ndjson([pred], configured_project_with_ontology)
 
 
-def test_validate_ndjson(tmp_path, configured_project_without_data_rows):
+def test_validate_ndjson(tmp_path, configured_project_with_ontology):
     file_name = f"broken.ndjson"
     file_path = tmp_path / file_name
     with file_path.open("w") as f:
         f.write("test")
 
     with pytest.raises(ValueError):
-        configured_project_without_data_rows.upload_annotations(
+        configured_project_with_ontology.upload_annotations(
             name="name", annotations=str(file_path), validate=True)
 
 
-def test_validate_ndjson_uuid(tmp_path, configured_project_without_data_rows,
+def test_validate_ndjson_uuid(tmp_path, configured_project_with_ontology,
                               predictions):
     file_name = f"repeat_uuid.ndjson"
     file_path = tmp_path / file_name
@@ -257,15 +262,15 @@ def test_validate_ndjson_uuid(tmp_path, configured_project_without_data_rows,
         parser.dump(repeat_uuid, f)
 
     with pytest.raises(MALValidationError):
-        configured_project_without_data_rows.upload_annotations(
+        configured_project_with_ontology.upload_annotations(
             name="name", validate=True, annotations=str(file_path))
 
     with pytest.raises(MALValidationError):
-        configured_project_without_data_rows.upload_annotations(
+        configured_project_with_ontology.upload_annotations(
             name="name", validate=True, annotations=repeat_uuid)
 
 
 def test_video_upload(video_checklist_inference,
-                      configured_project_without_data_rows):
+                      configured_project_with_ontology):
     pred = video_checklist_inference.copy()
-    _validate_ndjson([pred], configured_project_without_data_rows)
+    _validate_ndjson([pred], configured_project_with_ontology)
diff --git a/tests/integration/annotation_import/test_upsert_prediction_import.py b/tests/integration/annotation_import/test_upsert_prediction_import.py
index 9ed045f5e..927b6526d 100644
--- a/tests/integration/annotation_import/test_upsert_prediction_import.py
+++ b/tests/integration/annotation_import/test_upsert_prediction_import.py
@@ -13,7 +13,7 @@
 @pytest.mark.skip()
 def test_create_from_url(client, tmp_path, object_predictions,
                          model_run_with_data_rows,
-                         configured_project_without_data_rows,
+                         configured_project_with_one_data_row,
                          annotation_import_test_helpers):
     name = str(uuid.uuid4())
     file_name = f"{name}.json"
@@ -41,7 +41,7 @@ def test_create_from_url(client, tmp_path, object_predictions,
     annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project(
         name=name,
         predictions=url,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         priority=5)
 
     assert annotation_import.model_run_id == model_run_with_data_rows.uid
@@ -50,7 +50,7 @@ def test_create_from_url(client, tmp_path, object_predictions,
     assert annotation_import.statuses
 
     assert batch
-    assert batch.project().uid == configured_project_without_data_rows.uid
+    assert batch.project().uid == configured_project_with_one_data_row.uid
 
     assert mal_prediction_import
     mal_prediction_import.wait_until_done()
@@ -61,7 +61,7 @@ def test_create_from_url(client, tmp_path, object_predictions,
 
 @pytest.mark.skip()
 def test_create_from_objects(model_run_with_data_rows,
-                             configured_project_without_data_rows,
+                             configured_project_with_one_data_row,
                              object_predictions,
                              annotation_import_test_helpers):
     name = str(uuid.uuid4())
@@ -76,7 +76,7 @@ def test_create_from_objects(model_run_with_data_rows,
     annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project(
         name=name,
         predictions=predictions,
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         priority=5)
 
     assert annotation_import.model_run_id == model_run_with_data_rows.uid
@@ -85,7 +85,7 @@ def test_create_from_objects(model_run_with_data_rows,
     assert annotation_import.statuses
 
     assert batch
-    assert batch.project().uid == configured_project_without_data_rows.uid
+    assert batch.project().uid == configured_project_with_one_data_row.uid
 
     assert mal_prediction_import
     mal_prediction_import.wait_until_done()
@@ -96,7 +96,7 @@ def test_create_from_objects(model_run_with_data_rows,
 
 @pytest.mark.skip()
 def test_create_from_local_file(tmp_path, model_run_with_data_rows,
-                                configured_project_without_data_rows,
+                                configured_project_with_one_data_row,
                                 object_predictions,
                                 annotation_import_test_helpers):
 
@@ -119,7 +119,7 @@ def test_create_from_local_file(tmp_path, model_run_with_data_rows,
     annotation_import, batch, mal_prediction_import = model_run_with_data_rows.upsert_predictions_and_send_to_project(
         name=name,
         predictions=str(file_path),
-        project_id=configured_project_without_data_rows.uid,
+        project_id=configured_project_with_one_data_row.uid,
         priority=5)
 
     assert annotation_import.model_run_id == model_run_with_data_rows.uid
@@ -128,7 +128,7 @@ def test_create_from_local_file(tmp_path, model_run_with_data_rows,
     assert annotation_import.statuses
 
     assert batch
-    assert batch.project().uid == configured_project_without_data_rows.uid
+    assert batch.project().uid == configured_project_with_one_data_row.uid
 
     assert mal_prediction_import
     mal_prediction_import.wait_until_done()
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 82c739ddb..56ce6bae1 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -21,6 +21,7 @@
 from labelbox.schema.annotation_import import LabelImport
 from labelbox.schema.enums import AnnotationImportState
 from labelbox.schema.invite import Invite
+from labelbox.schema.project import Project
 from labelbox.schema.queue_mode import QueueMode
 from labelbox.schema.user import User
 
@@ -425,16 +426,21 @@ def configured_project_with_label(client, rand_gen, image_url, project, dataset,
     Additionally includes a create_label method for any needed extra labels
     One label is already created and yielded when using fixture
     """
+    start_time = time.time()
+    project._wait_until_data_rows_are_processed(data_row_ids=[data_row.uid],
+                                                sleep_interval=3)
 
     project.create_batch(
         rand_gen(str),
         [data_row.uid],  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
     )
+    print("create_batch took: ", time.time() - start_time)
     ontology = _setup_ontology(project)
+    print("setup ontology took: ", time.time() - start_time)
     label = _create_label(project, data_row, ontology,
                           wait_for_label_processing)
-
+    print("create_label took: ", time.time() - start_time)
     yield [project, dataset, data_row, label]
 
     for label in project.labels():
@@ -817,11 +823,13 @@ def upload_invalid_data_rows_for_dataset(dataset: Dataset):
     task.wait_till_done()
 
 
+@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ)
 def pytest_configure():
     pytest.report = defaultdict(int)
     pytest.data_row_report = {'times': 0, 'num_rows': 0}
 
 
+@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ)
 @pytest.hookimpl(hookwrapper=True)
 def pytest_fixture_setup(fixturedef, request):
     start = time.time()
@@ -832,11 +840,8 @@ def pytest_fixture_setup(fixturedef, request):
     exec_time = end - start
     pytest.report[fixturedef.argname] += exec_time
 
-    # print('pytest_fixture_setup'
-    #       f', request={request}'
-    #       f', create_data_row_time={end - start}')
-
 
+@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ)
 @pytest.fixture(scope='session', autouse=True)
 def print_perf_summary():
     yield
diff --git a/tests/integration/export_v2/conftest.py b/tests/integration/export_v2/conftest.py
index af8b4c66f..757bba44e 100644
--- a/tests/integration/export_v2/conftest.py
+++ b/tests/integration/export_v2/conftest.py
@@ -297,7 +297,7 @@ def configured_project_with_ontology(client, initial_dataset, ontology,
 
 
 @pytest.fixture
-def configured_project_without_data_rows(client, ontology, rand_gen):
+def configured_project_with_one_data_row(client, ontology, rand_gen):
     project = client.create_project(name=rand_gen(str),
                                     description=rand_gen(str),
                                     queue_mode=QueueMode.Batch)
diff --git a/tests/integration/export_v2/test_export_video.py b/tests/integration/export_v2/test_export_video.py
index 863f4d31e..94828e1b6 100644
--- a/tests/integration/export_v2/test_export_video.py
+++ b/tests/integration/export_v2/test_export_video.py
@@ -5,11 +5,11 @@
 from labelbox.schema.annotation_import import AnnotationImportState
 
 
-def test_export_v2_video(client, configured_project_without_data_rows,
+def test_export_v2_video(client, configured_project_with_one_data_row,
                          video_data, video_data_row,
                          bbox_video_annotation_objects, rand_gen):
 
-    project = configured_project_without_data_rows
+    project = configured_project_with_one_data_row
     project_id = project.uid
     labels = []
     _, data_row_uids = video_data

From 65990878d68bb1c35a3c9e6f6dff27c83ba1ea40 Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Fri, 18 Aug 2023 10:58:02 -0700
Subject: [PATCH 05/10] Remove more sources of data leakage

---
 .../annotation_import/test_data_types.py      | 19 ++++++++++++++-----
 tests/integration/conftest.py                 |  2 ++
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py
index 5de79f5cc..a5c27eb20 100644
--- a/tests/integration/annotation_import/test_data_types.py
+++ b/tests/integration/annotation_import/test_data_types.py
@@ -137,7 +137,8 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name):
 ])
 def test_import_data_types(client, configured_project, initial_dataset,
                            rand_gen, data_row_json_by_data_type,
-                           annotations_by_data_type, data_type_class):
+                           annotations_by_data_type, data_type_class,
+                           one_datarow):
 
     project = configured_project
     project_id = project.uid
@@ -304,14 +305,22 @@ def test_import_label_annotations(client, configured_project_with_one_data_row,
 
 
 @pytest.mark.parametrize('data_type, data_class, annotations', test_params)
-def test_import_mal_annotations(client, configured_project_with_one_data_row,
-                                data_row_json_by_data_type, data_type,
-                                data_class, annotations, rand_gen):
-
+@pytest.fixture
+def one_datarow(client, rand_gen, data_row_json_by_data_type, data_type):
     dataset = client.create_dataset(name=rand_gen(str))
     data_row_json = data_row_json_by_data_type[data_type]
     data_row = dataset.create_data_row(data_row_json)
 
+    yield data_row
+
+    dataset.delete()
+
+
+@pytest.mark.parametrize('data_type, data_class, annotations', test_params)
+def test_import_mal_annotations(client, configured_project_with_one_data_row,
+                                data_type, data_class, annotations, rand_gen,
+                                one_datarow):
+    data_row = one_datarow
     set_project_media_type_from_data_type(configured_project_with_one_data_row,
                                           data_class)
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 56ce6bae1..af66a7ed4 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -390,6 +390,8 @@ def initial_dataset(client, rand_gen):
     dataset = client.create_dataset(name=rand_gen(str))
     yield dataset
 
+    dataset.delete()
+
 
 @pytest.fixture
 def project_with_ontology(project):

From 9e41e82169da875ec0d9aee9638fa6fbca864b60 Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Fri, 18 Aug 2023 11:31:08 -0700
Subject: [PATCH 06/10] Add config for fixture profiling

---
 Makefile                                      |  1 +
 .../integration/annotation_import/conftest.py | 11 +-----
 .../annotation_import/test_data_types.py      | 13 +++++--
 tests/integration/conftest.py                 | 37 +++++++------------
 4 files changed, 25 insertions(+), 37 deletions(-)

diff --git a/Makefile b/Makefile
index f9f490554..b7838a7d4 100644
--- a/Makefile
+++ b/Makefile
@@ -13,6 +13,7 @@ test-local: build-image
 		-e LABELBOX_TEST_ENVIRON="local" \
 		-e DA_GCP_LABELBOX_API_KEY=${DA_GCP_LABELBOX_API_KEY} \
 		-e LABELBOX_TEST_API_KEY_LOCAL=${LABELBOX_TEST_API_KEY_LOCAL} \
+		-e FIXTURE_PROFILE=true \
 		local/labelbox-python:test pytest $(PATH_TO_TEST)
 
 test-staging: build-image
diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
index 3f1cd7de5..988ad7883 100644
--- a/tests/integration/annotation_import/conftest.py
+++ b/tests/integration/annotation_import/conftest.py
@@ -498,7 +498,6 @@ def get_data_row_id(indx=0):
 
 @pytest.fixture
 def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
-    start_time = time.time()
     dataset = initial_dataset
     project = client.create_project(name=rand_gen(str),
                                     queue_mode=QueueMode.Batch)
@@ -515,15 +514,12 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
         num_rows += 1
     project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
                                                 sleep_interval=3)
-    if pytest.data_row_report:
-        pytest.data_row_report['times'] += time.time() - start_time
-        pytest.data_row_report['num_rows'] += num_rows
+
     project.create_batch(
         rand_gen(str),
         data_row_ids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
     )
-    print("After creating batch ", time.time() - start_time)
     project.data_row_ids = data_row_ids
 
     yield project
@@ -579,8 +575,6 @@ def dataset_conversation_entity(client, rand_gen, conversation_entity_data_row,
 @pytest.fixture
 def configured_project_with_one_data_row(client, ontology, rand_gen,
                                          initial_dataset, image_url):
-    start_time = time.time()
-
     project = client.create_project(name=rand_gen(str),
                                     description=rand_gen(str),
                                     queue_mode=QueueMode.Batch)
@@ -594,9 +588,6 @@ def configured_project_with_one_data_row(client, ontology, rand_gen,
     project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
                                                 sleep_interval=3)
 
-    if pytest.data_row_report:
-        pytest.data_row_report['times'] += time.time() - start_time
-        pytest.data_row_report['num_rows'] += 1
     batch = project.create_batch(
         rand_gen(str),
         data_row_ids,  # sample of data row objects
diff --git a/tests/integration/annotation_import/test_data_types.py b/tests/integration/annotation_import/test_data_types.py
index a5c27eb20..79e8b03cb 100644
--- a/tests/integration/annotation_import/test_data_types.py
+++ b/tests/integration/annotation_import/test_data_types.py
@@ -135,10 +135,15 @@ def create_data_row_for_project(project, dataset, data_row_ndjson, batch_name):
     AudioData, ConversationData, DicomData, DocumentData, HTMLData, ImageData,
     TextData
 ])
-def test_import_data_types(client, configured_project, initial_dataset,
-                           rand_gen, data_row_json_by_data_type,
-                           annotations_by_data_type, data_type_class,
-                           one_datarow):
+def test_import_data_types(
+    client,
+    configured_project,
+    initial_dataset,
+    rand_gen,
+    data_row_json_by_data_type,
+    annotations_by_data_type,
+    data_type_class,
+):
 
     project = configured_project
     project_id = project.uid
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index af66a7ed4..a6651b97d 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -27,8 +27,6 @@
 
 IMG_URL = "https://picsum.photos/200/300.jpg"
 SMALL_DATASET_URL = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg"
-DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 30
-DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS = 5
 
 
 class Environ(Enum):
@@ -458,10 +456,8 @@ def configured_batch_project_with_label(project, dataset, data_row,
     One label is already created and yielded when using fixture
     """
     data_rows = [dr.uid for dr in list(dataset.data_rows())]
-    project._wait_until_data_rows_are_processed(
-        data_row_ids=data_rows,
-        wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS,
-        sleep_interval=DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS)
+    project._wait_until_data_rows_are_processed(data_row_ids=data_rows,
+                                                sleep_interval=3)
     project.create_batch("test-batch", data_rows)
     project.data_row_ids = data_rows
 
@@ -604,7 +600,6 @@ def configured_project_with_complex_ontology(client, initial_dataset, rand_gen,
     project.setup(editor, ontology.asdict())
 
     yield [project, data_row]
-    dataset.delete()
     project.delete()
 
 
@@ -825,35 +820,31 @@ def upload_invalid_data_rows_for_dataset(dataset: Dataset):
     task.wait_till_done()
 
 
-@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ)
 def pytest_configure():
     pytest.report = defaultdict(int)
-    pytest.data_row_report = {'times': 0, 'num_rows': 0}
 
 
-@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ)
 @pytest.hookimpl(hookwrapper=True)
-def pytest_fixture_setup(fixturedef, request):
+def pytest_fixture_setup(fixturedef):
     start = time.time()
     yield
-
     end = time.time()
 
     exec_time = end - start
-    pytest.report[fixturedef.argname] += exec_time
+    if "FIXTURE_PROFILE" in os.environ:
+        pytest.report[fixturedef.argname] += exec_time
 
 
-@pytest.mark.skipif("FIXTURE_PROFILE" not in os.environ)
 @pytest.fixture(scope='session', autouse=True)
 def print_perf_summary():
     yield
 
-    sorted_dict = dict(
-        sorted(pytest.report.items(), key=lambda item: item[1], reverse=True))
-    num_of_entries = 10 if len(sorted_dict) >= 10 else len(sorted_dict)
-    slowest_fixtures = [
-        (aaa, sorted_dict[aaa]) for aaa in islice(sorted_dict, num_of_entries)
-    ]
-    print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr)
-    print("Data row report:\n", pytest.data_row_report, file=sys.stderr)
-    # assert False
+    if "FIXTURE_PROFILE" in os.environ:
+        sorted_dict = dict(
+            sorted(pytest.report.items(),
+                   key=lambda item: item[1],
+                   reverse=True))
+        num_of_entries = 10 if len(sorted_dict) >= 10 else len(sorted_dict)
+        slowest_fixtures = [(aaa, sorted_dict[aaa])
+                            for aaa in islice(sorted_dict, num_of_entries)]
+        print("\nTop slowest fixtures:\n", slowest_fixtures, file=sys.stderr)

From ba2990d68087b575a7f27ed6e366fab1bb7caf53 Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Fri, 18 Aug 2023 15:02:14 -0700
Subject: [PATCH 07/10] Add explanation on how to supply data row ids to
 prediction_id_mapping

---
 .../integration/annotation_import/conftest.py  | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
index 988ad7883..1980d6f26 100644
--- a/tests/integration/annotation_import/conftest.py
+++ b/tests/integration/annotation_import/conftest.py
@@ -505,13 +505,11 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
         client.get_labeling_frontends(
             where=LabelingFrontend.name == "editor"))[0]
     project.setup(editor, ontology)
-    num_rows = 0
 
     data_row_ids = []
 
     for _ in range(len(ontology['tools']) + len(ontology['classifications'])):
         data_row_ids.append(dataset.create_data_row(row_data=image_url).uid)
-        num_rows += 1
     project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
                                                 sleep_interval=3)
 
@@ -605,6 +603,22 @@ def configured_project_with_one_data_row(client, ontology, rand_gen,
 # At the moment it expects only one feature per tool type and this creates unnecessary coupling between differet tests
 # In an example of a 'rectangle' we have extended to support multiple instances of the same tool type
 # TODO: we will support this approach in the future for all tools
+#
+"""
+Please note that this fixture now offers the flexibility to configure three different strategies for generating data row ids for predictions:
+Default(configured_project fixture):
+    configured_project that generates a data row for each member of ontology.
+    This makes sure each prediction has its own data row id. This is applicable to prediction upload cases when last label overwrites existing ones
+
+Optimized Strategy (configured_project_with_one_data_row fixture):
+    This fixture has only one data row and all predictions will be mapped to it
+
+Custom Data Row IDs Strategy:
+    Individuals can create their own fixture to supply data row ids. 
+    This particular fixture, termed "hardcoded_datarow_id," should be defined locally within a test file.
+"""
+
+
 @pytest.fixture
 def prediction_id_mapping(ontology, request):
     # Maps tool types to feature schema ids

From 700fefe87cb2259ea78b8994f986b641ce954cd7 Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Fri, 18 Aug 2023 15:50:42 -0700
Subject: [PATCH 08/10] Fix test_user_and_org.py

---
 tests/integration/test_user_and_org.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/integration/test_user_and_org.py b/tests/integration/test_user_and_org.py
index 9f07666de..ca158527c 100644
--- a/tests/integration/test_user_and_org.py
+++ b/tests/integration/test_user_and_org.py
@@ -1,3 +1,6 @@
+from labelbox.schema.project import Project
+
+
 def test_user(client):
     user = client.get_user()
     assert user.uid is not None
@@ -10,14 +13,11 @@ def test_organization(client):
     assert client.get_user() in set(organization.users())
 
 
-def test_user_and_org_projects(project):
-    client = project.client
+def test_user_and_org_projects(client, project):
     user = client.get_user()
     org = client.get_organization()
-    user_projects = set(user.projects())
-    org_projects = set(org.projects())
+    user_project = user.projects(where=Project.uid == project.uid)
+    org_project = org.projects(where=Project.uid == project.uid)
 
-    assert project.created_by() == user
-    assert project.organization() == org
-    assert project in user_projects
-    assert project in org_projects
\ No newline at end of file
+    assert user_project
+    assert org_project
\ No newline at end of file

From 5a6e250fba457336a78bd249c1cc22efc1973842 Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Tue, 22 Aug 2023 17:18:27 -0700
Subject: [PATCH 09/10] PR updates

---
 .../integration/annotation_import/conftest.py |  8 +++++--
 .../test_bulk_import_request.py               |  8 +++----
 tests/integration/conftest.py                 | 22 ++++++++++---------
 tests/integration/export_v2/conftest.py       |  2 +-
 .../export_v2/test_export_video.py            |  4 ++--
 tests/integration/test_filtering.py           |  4 ++--
 tests/integration/test_project.py             |  6 ++---
 7 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
index 1980d6f26..ebfe74f47 100644
--- a/tests/integration/annotation_import/conftest.py
+++ b/tests/integration/annotation_import/conftest.py
@@ -513,7 +513,7 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
     project._wait_until_data_rows_are_processed(data_row_ids=data_row_ids,
                                                 sleep_interval=3)
 
-    project.create_batch(
+    batch = project.create_batch(
         rand_gen(str),
         data_row_ids,  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
@@ -521,6 +521,8 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
     project.data_row_ids = data_row_ids
 
     yield project
+
+    batch.delete()
     project.delete()
 
 
@@ -614,8 +616,10 @@ def configured_project_with_one_data_row(client, ontology, rand_gen,
     This fixture has only one data row and all predictions will be mapped to it
 
 Custom Data Row IDs Strategy:
-    Individuals can create their own fixture to supply data row ids. 
+    Individuals can supply hard-coded data row ids when a creation of data row is not required. 
     This particular fixture, termed "hardcoded_datarow_id," should be defined locally within a test file.
+    In the future, we can use this approach to inject correct number of rows instead of using configured_project fixture 
+        that creates a data row for each member of ontology (14 in total) for each run.
 """
 
 
diff --git a/tests/integration/annotation_import/test_bulk_import_request.py b/tests/integration/annotation_import/test_bulk_import_request.py
index 6691cc044..52552f53d 100644
--- a/tests/integration/annotation_import/test_bulk_import_request.py
+++ b/tests/integration/annotation_import/test_bulk_import_request.py
@@ -41,13 +41,13 @@ def test_create_from_url(project):
     assert bulk_import_request.state == BulkImportRequestState.RUNNING
 
 
-def test_validate_file(project_with_ontology):
+def test_validate_file(project_with_empty_ontology):
     name = str(uuid.uuid4())
     url = "https://storage.googleapis.com/labelbox-public-bucket/predictions_test_v2.ndjson"
     with pytest.raises(MALValidationError):
-        project_with_ontology.upload_annotations(name=name,
-                                                 annotations=url,
-                                                 validate=True)
+        project_with_empty_ontology.upload_annotations(name=name,
+                                                       annotations=url,
+                                                       validate=True)
         #Schema ids shouldn't match
 
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index a6651b97d..781fe6edb 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -27,6 +27,8 @@
 
 IMG_URL = "https://picsum.photos/200/300.jpg"
 SMALL_DATASET_URL = "https://storage.googleapis.com/lb-artifacts-testing-public/sdk_integration_test/potato.jpeg"
+DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS = 30
+DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS = 3
 
 
 class Environ(Enum):
@@ -392,7 +394,7 @@ def initial_dataset(client, rand_gen):
 
 
 @pytest.fixture
-def project_with_ontology(project):
+def project_with_empty_ontology(project):
     editor = list(
         project.client.get_labeling_frontends(
             where=LabelingFrontend.name == "editor"))[0]
@@ -402,13 +404,13 @@ def project_with_ontology(project):
 
 
 @pytest.fixture
-def configured_project(project_with_ontology, initial_dataset, rand_gen,
+def configured_project(project_with_empty_ontology, initial_dataset, rand_gen,
                        image_url):
     dataset = initial_dataset
     data_row_id = dataset.create_data_row(row_data=image_url).uid
-    project = project_with_ontology
+    project = project_with_empty_ontology
 
-    project.create_batch(
+    batch = project.create_batch(
         rand_gen(str),
         [data_row_id],  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
@@ -417,6 +419,8 @@ def configured_project(project_with_ontology, initial_dataset, rand_gen,
 
     yield project
 
+    batch.delete()
+
 
 @pytest.fixture
 def configured_project_with_label(client, rand_gen, image_url, project, dataset,
@@ -426,21 +430,19 @@ def configured_project_with_label(client, rand_gen, image_url, project, dataset,
     Additionally includes a create_label method for any needed extra labels
     One label is already created and yielded when using fixture
     """
-    start_time = time.time()
-    project._wait_until_data_rows_are_processed(data_row_ids=[data_row.uid],
-                                                sleep_interval=3)
+    project._wait_until_data_rows_are_processed(
+        data_row_ids=[data_row.uid],
+        wait_processing_max_seconds=DATA_ROW_PROCESSING_WAIT_TIMEOUT_SECONDS,
+        sleep_interval=DATA_ROW_PROCESSING_WAIT_SLEEP_INTERNAL_SECONDS)
 
     project.create_batch(
         rand_gen(str),
         [data_row.uid],  # sample of data row objects
         5  # priority between 1(Highest) - 5(lowest)
     )
-    print("create_batch took: ", time.time() - start_time)
     ontology = _setup_ontology(project)
-    print("setup ontology took: ", time.time() - start_time)
     label = _create_label(project, data_row, ontology,
                           wait_for_label_processing)
-    print("create_label took: ", time.time() - start_time)
     yield [project, dataset, data_row, label]
 
     for label in project.labels():
diff --git a/tests/integration/export_v2/conftest.py b/tests/integration/export_v2/conftest.py
index 757bba44e..af8b4c66f 100644
--- a/tests/integration/export_v2/conftest.py
+++ b/tests/integration/export_v2/conftest.py
@@ -297,7 +297,7 @@ def configured_project_with_ontology(client, initial_dataset, ontology,
 
 
 @pytest.fixture
-def configured_project_with_one_data_row(client, ontology, rand_gen):
+def configured_project_without_data_rows(client, ontology, rand_gen):
     project = client.create_project(name=rand_gen(str),
                                     description=rand_gen(str),
                                     queue_mode=QueueMode.Batch)
diff --git a/tests/integration/export_v2/test_export_video.py b/tests/integration/export_v2/test_export_video.py
index 94828e1b6..863f4d31e 100644
--- a/tests/integration/export_v2/test_export_video.py
+++ b/tests/integration/export_v2/test_export_video.py
@@ -5,11 +5,11 @@
 from labelbox.schema.annotation_import import AnnotationImportState
 
 
-def test_export_v2_video(client, configured_project_with_one_data_row,
+def test_export_v2_video(client, configured_project_without_data_rows,
                          video_data, video_data_row,
                          bbox_video_annotation_objects, rand_gen):
 
-    project = configured_project_with_one_data_row
+    project = configured_project_without_data_rows
     project_id = project.uid
     labels = []
     _, data_row_uids = video_data
diff --git a/tests/integration/test_filtering.py b/tests/integration/test_filtering.py
index fde7f0638..f44cdcdcb 100644
--- a/tests/integration/test_filtering.py
+++ b/tests/integration/test_filtering.py
@@ -15,7 +15,7 @@ def project_to_test_where(client, rand_gen):
     p_b = client.create_project(name=p_b_name, queue_mode=QueueMode.Batch)
     p_c = client.create_project(name=p_c_name, queue_mode=QueueMode.Batch)
 
-    yield p_a, p_b, p_c
+    yield p_a, p_b
 
     p_a.delete()
     p_b.delete()
@@ -26,7 +26,7 @@ def project_to_test_where(client, rand_gen):
 # other builds simultaneously adding projects to test org
 def test_where(client, project_to_test_where):
     p_a, p_b, p_c = project_to_test_where
-    p_a_name, p_b_name, _ = [p.name for p in [p_a, p_b, p_c]]
+    p_a_name, p_b_name = [p.name for p in [p_a, p_b]]
 
     def get(where=None):
         date_where = Project.created_at >= p_a.created_at
diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py
index b3b683a3d..94c98ee50 100644
--- a/tests/integration/test_project.py
+++ b/tests/integration/test_project.py
@@ -171,15 +171,15 @@ def test_attach_instructions(client, project):
 
 @pytest.mark.skipif(condition=os.environ['LABELBOX_TEST_ENVIRON'] == "onprem",
                     reason="new mutation does not work for onprem")
-def test_html_instructions(project_with_ontology):
+def test_html_instructions(project_with_empty_ontology):
     html_file_path = '/tmp/instructions.html'
     sample_html_str = "<html></html>"
 
     with open(html_file_path, 'w') as file:
         file.write(sample_html_str)
 
-    project_with_ontology.upsert_instructions(html_file_path)
-    updated_ontology = project_with_ontology.ontology().normalized
+    project_with_empty_ontology.upsert_instructions(html_file_path)
+    updated_ontology = project_with_empty_ontology.ontology().normalized
 
     instructions = updated_ontology.pop('projectInstructions')
     assert requests.get(instructions).text == sample_html_str

From e585e8c14c89503a99319c342f80903bc5eeeaec Mon Sep 17 00:00:00 2001
From: Val Brodsky <vbrodsky@labelbox.com>
Date: Tue, 22 Aug 2023 17:35:52 -0700
Subject: [PATCH 10/10] Turn on fixture profile for staging

---
 .github/workflows/python-package.yml            | 1 +
 tests/integration/annotation_import/conftest.py | 1 -
 tests/integration/test_filtering.py             | 2 +-
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 6ed378f09..83c0393af 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -39,6 +39,7 @@ jobs:
             echo "LABELBOX_TEST_ENVIRON=prod" >> $GITHUB_ENV
           else
             echo "LABELBOX_TEST_ENVIRON=staging" >> $GITHUB_ENV
+            echo "FIXTURE_PROFILE=true" >> $GITHUB_ENV
           fi
 
       - uses: actions/checkout@v2
diff --git a/tests/integration/annotation_import/conftest.py b/tests/integration/annotation_import/conftest.py
index ebfe74f47..d50c44d0c 100644
--- a/tests/integration/annotation_import/conftest.py
+++ b/tests/integration/annotation_import/conftest.py
@@ -522,7 +522,6 @@ def configured_project(client, initial_dataset, ontology, rand_gen, image_url):
 
     yield project
 
-    batch.delete()
     project.delete()
 
 
diff --git a/tests/integration/test_filtering.py b/tests/integration/test_filtering.py
index f44cdcdcb..7dd687759 100644
--- a/tests/integration/test_filtering.py
+++ b/tests/integration/test_filtering.py
@@ -15,7 +15,7 @@ def project_to_test_where(client, rand_gen):
     p_b = client.create_project(name=p_b_name, queue_mode=QueueMode.Batch)
     p_c = client.create_project(name=p_c_name, queue_mode=QueueMode.Batch)
 
-    yield p_a, p_b
+    yield p_a, p_b, p_c
 
     p_a.delete()
     p_b.delete()