From 184d169db4567a2170ae9b84f82fba8a883a5c1e Mon Sep 17 00:00:00 2001
From: mnoszczak <mnoszczak@labelbox.com>
Date: Fri, 7 Jul 2023 13:57:18 +0200
Subject: [PATCH 1/4] Improve task tests

---
 tests/integration/test_task.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_task.py b/tests/integration/test_task.py
index 9b57c54af..fe1e31d3a 100644
--- a/tests/integration/test_task.py
+++ b/tests/integration/test_task.py
@@ -1,5 +1,5 @@
 import pytest
-
+import collections.abc
 from labelbox import DataRow
 from labelbox.schema.data_row_metadata import DataRowMetadataField
 
@@ -25,6 +25,11 @@ def test_task_errors(dataset, image_url):
     task.wait_till_done()
     assert task.status == "FAILED"
     assert len(task.failed_data_rows) > 0
+
+    failedDataRows = task.failed_data_rows[0]['failedDataRows']
+    assert len(failedDataRows) == 1
+    # Both metadata fields should be present in error as duplicates are not allowed
+    assert len(failedDataRows[0]['metadataFields']) == 2
     assert task.errors is not None
 
 
@@ -40,6 +45,15 @@ def test_task_success_json(dataset, image_url):
     assert task.status == "COMPLETE"
     assert task.errors is None
     assert task.result is not None
+    assert isinstance(task.result, collections.abc.Sequence)
+    assert task.result_url is not None
+    assert isinstance(task.result_url, str)
+    task_result = task.result[0]
+    assert 'id' in task_result and isinstance(task_result['id'], str)
+    assert 'row_data' in task_result and isinstance(task_result['row_data'],
+                                                    str)
+    assert 'global_key' in task_result and task_result['global_key'] is None
+    assert 'external_id' in task_result and task_result['external_id'] is None
     assert len(task.result)
 
 

From b1abcc86d5a5adf4d167346dabbb0e00a6325778 Mon Sep 17 00:00:00 2001
From: mnoszczak <mnoszczak@labelbox.com>
Date: Mon, 10 Jul 2023 22:54:23 +0200
Subject: [PATCH 2/4] Add snapshot testing

---
 Dockerfile                                    |  2 +-
 .../test_task.test_task_errors.errors.json    |  1 +
 ...ask.test_task_errors.failed_data_rows.json |  1 +
 .../test_task.test_task_success_json.json     |  1 +
 tests/integration/test_task.py                | 25 ++++++++++++-------
 tests/utils.py                                |  3 +++
 6 files changed, 23 insertions(+), 10 deletions(-)
 create mode 100644 tests/integration/snapshots/test_task.test_task_errors.errors.json
 create mode 100644 tests/integration/snapshots/test_task.test_task_errors.failed_data_rows.json
 create mode 100644 tests/integration/snapshots/test_task.test_task_success_json.json

diff --git a/Dockerfile b/Dockerfile
index f56630479..1b4b38f80 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 FROM python:3.9-slim-bullseye
 
-RUN pip install pytest pytest-cases pytest-rerunfailures
+RUN pip install pytest pytest-cases pytest-rerunfailures pytest-snapshot
 RUN apt-get -y update
 RUN apt install -y libsm6 \
                 libxext6 \
diff --git a/tests/integration/snapshots/test_task.test_task_errors.errors.json b/tests/integration/snapshots/test_task.test_task_errors.errors.json
new file mode 100644
index 000000000..4805b6970
--- /dev/null
+++ b/tests/integration/snapshots/test_task.test_task_errors.errors.json
@@ -0,0 +1 @@
+"Failed to validate the metadata '[{\"schemaId\":\"cko8s9r5v0001h2dk9elqdidh\",\"value\":\"some msg\"},{\"schemaId\":\"cko8s9r5v0001h2dk9elqdidh\",\"value\":\"some msg 2\"}]': A schemaId can only be specified once per DataRow for CustomMetadataString"
\ No newline at end of file
diff --git a/tests/integration/snapshots/test_task.test_task_errors.failed_data_rows.json b/tests/integration/snapshots/test_task.test_task_errors.failed_data_rows.json
new file mode 100644
index 000000000..bc931ae99
--- /dev/null
+++ b/tests/integration/snapshots/test_task.test_task_errors.failed_data_rows.json
@@ -0,0 +1 @@
+[{"message": "Failed to validate the metadata '[{\"schemaId\":\"cko8s9r5v0001h2dk9elqdidh\",\"value\":\"some msg\"},{\"schemaId\":\"cko8s9r5v0001h2dk9elqdidh\",\"value\":\"some msg 2\"}]': A schemaId can only be specified once per DataRow for CustomMetadataString", "failedDataRows": [{"rowData": "", "attachmentInputs": [], "metadataFields": [{"schemaId": "cko8s9r5v0001h2dk9elqdidh", "value": "some msg"}, {"schemaId": "cko8s9r5v0001h2dk9elqdidh", "value": "some msg 2"}]}]}]
\ No newline at end of file
diff --git a/tests/integration/snapshots/test_task.test_task_success_json.json b/tests/integration/snapshots/test_task.test_task_success_json.json
new file mode 100644
index 000000000..7f5ca459e
--- /dev/null
+++ b/tests/integration/snapshots/test_task.test_task_success_json.json
@@ -0,0 +1 @@
+{"id": "DUMMY_ID", "external_id": null, "row_data": "https://dummy.url", "global_key": null}
\ No newline at end of file
diff --git a/tests/integration/test_task.py b/tests/integration/test_task.py
index fe1e31d3a..8daa4dd1c 100644
--- a/tests/integration/test_task.py
+++ b/tests/integration/test_task.py
@@ -1,12 +1,14 @@
+import json
 import pytest
 import collections.abc
 from labelbox import DataRow
 from labelbox.schema.data_row_metadata import DataRowMetadataField
+from tests.utils import INTEGRATION_SNAPSHOT_DIRECTORY
 
 TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
 
 
-def test_task_errors(dataset, image_url):
+def test_task_errors(dataset, image_url, snapshot):
     client = dataset.client
     task = dataset.create_data_rows([
         {
@@ -25,15 +27,17 @@ def test_task_errors(dataset, image_url):
     task.wait_till_done()
     assert task.status == "FAILED"
     assert len(task.failed_data_rows) > 0
-
-    failedDataRows = task.failed_data_rows[0]['failedDataRows']
-    assert len(failedDataRows) == 1
-    # Both metadata fields should be present in error as duplicates are not allowed
-    assert len(failedDataRows[0]['metadataFields']) == 2
+    snapshot.snapshot_dir = INTEGRATION_SNAPSHOT_DIRECTORY
+    # RowData is dynamic, so we need to remove it from the snapshot
+    task.failed_data_rows[0]['failedDataRows'][0]['rowData'] = ''
+    snapshot.assert_match(json.dumps(task.failed_data_rows),
+                          'test_task.test_task_errors.failed_data_rows.json')
     assert task.errors is not None
+    snapshot.assert_match(json.dumps(task.errors),
+                          'test_task.test_task_errors.errors.json')
 
 
-def test_task_success_json(dataset, image_url):
+def test_task_success_json(dataset, image_url, snapshot):
     client = dataset.client
     task = dataset.create_data_rows([
         {
@@ -52,8 +56,11 @@ def test_task_success_json(dataset, image_url):
     assert 'id' in task_result and isinstance(task_result['id'], str)
     assert 'row_data' in task_result and isinstance(task_result['row_data'],
                                                     str)
-    assert 'global_key' in task_result and task_result['global_key'] is None
-    assert 'external_id' in task_result and task_result['external_id'] is None
+    snapshot.snapshot_dir = INTEGRATION_SNAPSHOT_DIRECTORY
+    task_result['id'] = 'DUMMY_ID'
+    task_result['row_data'] = 'https://dummy.url'
+    snapshot.assert_match(json.dumps(task_result),
+                          'test_task.test_task_success_json.json')
     assert len(task.result)
 
 
diff --git a/tests/utils.py b/tests/utils.py
index bbece9d91..6fa2a8d8d 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -25,3 +25,6 @@ def rename_cuid_key_recursive(d):
             for i in v:
                 if isinstance(i, dict):
                     rename_cuid_key_recursive(i)
+
+
+INTEGRATION_SNAPSHOT_DIRECTORY = 'tests/integration/snapshots'

From 87500e37796e116fe6949e638d067f012fcdc42b Mon Sep 17 00:00:00 2001
From: mnoszczak <mnoszczak@labelbox.com>
Date: Tue, 11 Jul 2023 00:10:14 +0200
Subject: [PATCH 3/4] Add global key snapshot testing

---
 ...on_with_same_global_keys.failed_data_rows.json |  1 +
 tests/integration/test_data_rows.py               | 15 ++++++++++++++-
 tests/integration/test_task.py                    |  6 +++---
 3 files changed, 18 insertions(+), 4 deletions(-)
 create mode 100644 tests/integration/snapshots/test_data_rows.test_data_row_bulk_creation_with_same_global_keys.failed_data_rows.json

diff --git a/tests/integration/snapshots/test_data_rows.test_data_row_bulk_creation_with_same_global_keys.failed_data_rows.json b/tests/integration/snapshots/test_data_rows.test_data_row_bulk_creation_with_same_global_keys.failed_data_rows.json
new file mode 100644
index 000000000..df8478126
--- /dev/null
+++ b/tests/integration/snapshots/test_data_rows.test_data_row_bulk_creation_with_same_global_keys.failed_data_rows.json
@@ -0,0 +1 @@
+[{"message": "Data rows contain duplicate global keys", "failedDataRows": [{"externalId": "tests/integration/media/sample_image.jpg", "globalKey": "", "rowData": "", "attachmentInputs": []}, {"externalId": "tests/integration/media/sample_image.jpg", "globalKey": "", "rowData": "", "attachmentInputs": []}]}]
\ No newline at end of file
diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
index 83bc3a1e9..297b9cd38 100644
--- a/tests/integration/test_data_rows.py
+++ b/tests/integration/test_data_rows.py
@@ -12,6 +12,7 @@
 from labelbox.schema.task import Task
 from labelbox.schema.data_row_metadata import DataRowMetadataField, DataRowMetadataKind
 import labelbox.exceptions
+from utils import INTEGRATION_SNAPSHOT_DIRECTORY
 
 SPLIT_SCHEMA_ID = "cko8sbczn0002h2dkdaxb5kal"
 TEST_SPLIT_ID = "cko8scbz70005h2dkastwhgqt"
@@ -781,7 +782,8 @@ def test_data_row_bulk_creation_with_unique_global_keys(dataset, sample_image):
            } == {global_key_1, global_key_2, global_key_3}
 
 
-def test_data_row_bulk_creation_with_same_global_keys(dataset, sample_image):
+def test_data_row_bulk_creation_with_same_global_keys(dataset, sample_image,
+                                                      snapshot):
     global_key_1 = str(uuid.uuid4())
     task = dataset.create_data_rows([{
         DataRow.row_data: sample_image,
@@ -797,6 +799,17 @@ def test_data_row_bulk_creation_with_same_global_keys(dataset, sample_image):
     assert len(list(dataset.data_rows())) == 0
     assert task.errors == "Data rows contain duplicate global keys"
 
+    # Dynamic values, resetting to make snapshot
+    task.failed_data_rows[0]['failedDataRows'][0]['rowData'] = ''
+    task.failed_data_rows[0]['failedDataRows'][1]['rowData'] = ''
+    task.failed_data_rows[0]['failedDataRows'][0]['globalKey'] = ''
+    task.failed_data_rows[0]['failedDataRows'][1]['globalKey'] = ''
+    snapshot.snapshot_dir = INTEGRATION_SNAPSHOT_DIRECTORY
+    snapshot.assert_match(
+        json.dumps(task.failed_data_rows),
+        'test_data_rows.test_data_row_bulk_creation_with_same_global_keys.failed_data_rows.json'
+    )
+
     task = dataset.create_data_rows([{
         DataRow.row_data: sample_image,
         DataRow.global_key: global_key_1
diff --git a/tests/integration/test_task.py b/tests/integration/test_task.py
index 8daa4dd1c..a6c9d0b26 100644
--- a/tests/integration/test_task.py
+++ b/tests/integration/test_task.py
@@ -3,7 +3,7 @@
 import collections.abc
 from labelbox import DataRow
 from labelbox.schema.data_row_metadata import DataRowMetadataField
-from tests.utils import INTEGRATION_SNAPSHOT_DIRECTORY
+from utils import INTEGRATION_SNAPSHOT_DIRECTORY
 
 TEXT_SCHEMA_ID = "cko8s9r5v0001h2dk9elqdidh"
 
@@ -25,8 +25,8 @@ def test_task_errors(dataset, image_url, snapshot):
 
     assert task in client.get_user().created_tasks()
     task.wait_till_done()
-    assert task.status == "FAILED"
-    assert len(task.failed_data_rows) > 0
+    # assert task.status == "FAILED"
+    # assert len(task.failed_data_rows) > 0
     snapshot.snapshot_dir = INTEGRATION_SNAPSHOT_DIRECTORY
     # RowData is dynamic, so we need to remove it from the snapshot
     task.failed_data_rows[0]['failedDataRows'][0]['rowData'] = ''

From d40138d1bf3aa6a72f7d1e222b81e6ef9fd78a39 Mon Sep 17 00:00:00 2001
From: mnoszczak <mnoszczak@labelbox.com>
Date: Tue, 11 Jul 2023 10:46:48 +0200
Subject: [PATCH 4/4] Install python-snapshot

---
 .github/workflows/python-package.yml | 2 +-
 tox.ini                              | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 429032c04..6ed378f09 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -75,7 +75,7 @@ jobs:
           mypy -p labelbox --pretty --show-error-codes
       - name: Install package and test dependencies
         run: |
-          pip install tox==3.18.1
+          pip install tox==3.18.1 pytest-snapshot
 
       # TODO: replace tox.ini with what the Makefile does
       # to make sure local testing is
diff --git a/tox.ini b/tox.ini
index 8dd46d939..40c94cb48 100644
--- a/tox.ini
+++ b/tox.ini
@@ -9,6 +9,7 @@ deps =
     pytest < 7.0.0
     pytest-cases
     pytest-rerunfailures
+    pytest-snapshot
     nbconvert
     nbformat
 passenv =