From 22b97bdbdbb6a76bd0469156d7175c7c4640d706 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Mon, 24 Oct 2022 15:02:04 -0400
Subject: [PATCH 01/14] data row content under row_data

---
 labelbox/schema/dataset.py          |  57 +++++++--------
 tests/integration/test_data_rows.py | 109 ++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+), 31 deletions(-)

diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py
index a2deb62ab..8afb983f1 100644
--- a/labelbox/schema/dataset.py
+++ b/labelbox/schema/dataset.py
@@ -229,8 +229,8 @@ def _create_descriptor_file(self, items, max_attachments_per_data_row=None):
         >>>     {DataRow.row_data:"http://my_site.com/photos/img_01.jpg"},
         >>>     {DataRow.row_data:"/path/to/file1.jpg"},
         >>>     "path/to/file2.jpg",
-        >>>     {"tileLayerUrl" : "http://", ...}
-        >>>     {"conversationalData" : [...], ...}
+        >>>     {DataRow.row_data: {"tileLayerUrl" : "http://", ...}}
+        >>>     {DataRow.row_data: {"type" : ..., 'version' : ..., 'messages' : [...]}}
         >>>     ])
 
         For an example showing how to upload tiled data_rows see the following notebook:
@@ -258,7 +258,7 @@ def _create_descriptor_file(self, items, max_attachments_per_data_row=None):
 
         def upload_if_necessary(item):
             row_data = item['row_data']
-            if os.path.exists(row_data):
+            if isinstance(row_data, str) and os.path.exists(row_data):
                 item_url = self.client.upload_file(row_data)
                 item['row_data'] = item_url
                 if 'external_id' not in item:
@@ -341,40 +341,39 @@ def validate_keys(item):
                     "`row_data` missing when creating DataRow.")
 
             invalid_keys = set(item) - {
-                *{f.name for f in DataRow.fields()}, 'attachments'
+                *{f.name for f in DataRow.fields()}, 'attachments', 'media_type'
             }
             if invalid_keys:
                 raise InvalidAttributeError(DataRow, invalid_keys)
             return item
 
+        def formatLegacyConversationalData(item):
+            messages = item.pop("conversationalData")
+            version = item.pop("version")
+            type = item.pop("type")
+            if "externalId" in item:
+                external_id = item.pop("externalId")
+                item["external_id"] = external_id
+            if "globalKey" in item:
+                global_key = item.pop("globalKey")
+                item["globalKey"] = global_key
+            validate_conversational_data(messages)
+            one_conversation = \
+                {
+                    "type": type,
+                    "version": version,
+                    "messages": messages
+                }
+            item["row_data"] = one_conversation
+            return item
+
         def convert_item(item):
-            # Don't make any changes to tms data
             if "tileLayerUrl" in item:
                 validate_attachments(item)
                 return item
 
             if "conversationalData" in item:
-                messages = item.pop("conversationalData")
-                version = item.pop("version")
-                type = item.pop("type")
-                if "externalId" in item:
-                    external_id = item.pop("externalId")
-                    item["external_id"] = external_id
-                if "globalKey" in item:
-                    global_key = item.pop("globalKey")
-                    item["globalKey"] = global_key
-                validate_conversational_data(messages)
-                one_conversation = \
-                    {
-                        "type": type,
-                        "version": version,
-                        "messages": messages
-                    }
-                conversationUrl = self.client.upload_data(
-                    json.dumps(one_conversation),
-                    content_type="application/json",
-                    filename="conversational_data.json")
-                item["row_data"] = conversationUrl
+                formatLegacyConversationalData(item)
 
             # Convert all payload variations into the same dict format
             item = format_row(item)
@@ -386,11 +385,7 @@ def convert_item(item):
             parse_metadata_fields(item)
             # Upload any local file paths
             item = upload_if_necessary(item)
-
-            return {
-                "data" if key == "row_data" else utils.camel_case(key): value
-                for key, value in item.items()
-            }
+            return item
 
         if not isinstance(items, Iterable):
             raise ValueError(
diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
index 563358058..d98e859d5 100644
--- a/tests/integration/test_data_rows.py
+++ b/tests/integration/test_data_rows.py
@@ -1,6 +1,7 @@
 from tempfile import NamedTemporaryFile
 import uuid
 from datetime import datetime
+import json
 
 import pytest
 import requests
@@ -695,3 +696,111 @@ def test_data_row_rulk_creation_sync_with_same_global_keys(
 
     assert len(list(dataset.data_rows())) == 1
     assert list(dataset.data_rows())[0].global_key == global_key_1
+
+
+def test_create_conversational_text(dataset):
+    content = {
+        'row_data': {
+            "messages": [{
+                "messageId": "message-0",
+                "timestampUsec": 1530718491,
+                "content": "I love iphone! i just bought new iphone! 🥰 📲",
+                "user": {
+                    "userId": "Bot 002",
+                    "name": "Bot"
+                },
+                "align": "left",
+                "canLabel": False
+            }],
+            "version": 1,
+            "type": "application/vnd.labelbox.conversational"
+        }
+    }
+    examples = [
+        {
+            **content, 'media_type': 'CONVERSATIONAL_TEXT'
+        },
+        content,
+        content['row_data']  # Old way to check for backwards compatibility
+    ]
+    dataset.create_data_rows_sync(examples)
+    data_rows = list(dataset.data_rows())
+    assert len(data_rows) == len(examples)
+    for data_row in data_rows:
+        assert requests.get(data_row.row_data).json() == content['row_data']
+
+
+def test_invalid_media_type(dataset):
+    content = {
+        'row_data': {
+            "messages": [{
+                "messageId": "message-0",
+                "timestampUsec": 1530718491,
+                "content": "I love iphone! i just bought new iphone! 🥰 📲",
+                "user": {
+                    "userId": "Bot 002",
+                    "name": "Bot"
+                },
+                "align": "left",
+                "canLabel": False
+            }],
+            "version": 1,
+            "type": "application/vnd.labelbox.conversational"
+        }
+    }
+
+    for error_message, invalid_media_type in [[
+            "Found invalid contents for media type: 'IMAGE'", 'IMAGE'
+    ], ["Found invalid media type: 'totallyinvalid'", 'totallyinvalid']]:
+        # TODO: What error kind should this be? It looks like for global key we are
+        # using malformed query. But for FileUploads we use InvalidQueryError
+        with pytest.raises(labelbox.exceptions.InvalidQueryError):
+            dataset.create_data_rows_sync([{
+                **content, 'media_type': invalid_media_type
+            }])
+
+        task = dataset.create_data_rows([{
+            **content, 'media_type': invalid_media_type
+        }])
+        task.wait_till_done()
+        assert task.errors == {'message': error_message}
+
+
+def test_create_tiled_layer(dataset):
+    content = {
+        "row_data": {
+            "tileLayerUrl":
+                "https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png",
+            "bounds": [[19.405662413477728, -99.21052827588443],
+                       [19.400498983095076, -99.20534818927473]],
+            "minZoom":
+                12,
+            "maxZoom":
+                20,
+            "epsg":
+                "EPSG4326",
+            "alternativeLayers": [{
+                "tileLayerUrl":
+                    "https://api.mapbox.com/styles/v1/mapbox/satellite-streets-v11/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw",
+                "name":
+                    "Satellite"
+            }, {
+                "tileLayerUrl":
+                    "https://api.mapbox.com/styles/v1/mapbox/navigation-guidance-night-v4/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw",
+                "name":
+                    "Guidance"
+            }]
+        }
+    }
+    examples = [
+        {
+            **content, 'media_type': 'TMS_SIMPLE'
+        },
+        content,
+        content['row_data']  # Old way to check for backwards compatibility
+    ]
+    dataset.create_data_rows_sync(examples)
+    data_rows = list(dataset.data_rows())
+    assert len(data_rows) == len(examples)
+    for data_row in data_rows:
+        assert json.loads(data_row.row_data) == content['row_data']

From 272e149b3e6ebe679712dafc815588e30574b4aa Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Mon, 24 Oct 2022 18:19:21 -0400
Subject: [PATCH 02/14] create data row from objects

---
 labelbox/orm/model.py         | 4 ++--
 labelbox/schema/data_row.py   | 7 +++++--
 labelbox/schema/dataset.py    | 8 ++++++--
 labelbox/schema/media_type.py | 4 ++--
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/labelbox/orm/model.py b/labelbox/orm/model.py
index 9427489ff..a964a0607 100644
--- a/labelbox/orm/model.py
+++ b/labelbox/orm/model.py
@@ -104,8 +104,8 @@ def DateTime(*args):
         return Field(Field.Type.DateTime, *args)
 
     @staticmethod
-    def Enum(enum_cls: type, *args):
-        return Field(Field.EnumType(enum_cls), *args)
+    def Enum(enum_cls: type, *args, **kwargs):
+        return Field(Field.EnumType(enum_cls), *args, **kwargs)
 
     @staticmethod
     def Json(*args):
diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py
index 4c7bb8287..e33364ad7 100644
--- a/labelbox/schema/data_row.py
+++ b/labelbox/schema/data_row.py
@@ -5,6 +5,7 @@
 from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable
 from labelbox.orm.model import Entity, Field, Relationship
 from labelbox.schema.data_row_metadata import DataRowMetadataField  # type: ignore
+from labelbox.schema.media_type import MediaType
 
 if TYPE_CHECKING:
     from labelbox import AssetAttachment
@@ -48,6 +49,7 @@ class DataRow(DbObject, Updateable, BulkDeletable):
                           name="metadata",
                           graphql_name="customMetadata",
                           result_subquery="customMetadata { schemaId value }")
+    media_type = Field.Enum(MediaType, "media_type", result_subquery="")
 
     # Relationships
     dataset = Relationship.ToOne("Dataset")
@@ -59,8 +61,9 @@ class DataRow(DbObject, Updateable, BulkDeletable):
     supported_meta_types = supported_attachment_types = set(
         Entity.AssetAttachment.AttachmentType.__members__)
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
+    def __init__(self, client, field_values, **kwargs):
+        field_values.update({'mediaType': MediaType.Unknown})
+        super().__init__(client, field_values, **kwargs)
         self.attachments.supports_filtering = False
         self.attachments.supports_sorting = False
 
diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py
index 8afb983f1..b820d3c19 100644
--- a/labelbox/schema/dataset.py
+++ b/labelbox/schema/dataset.py
@@ -95,9 +95,12 @@ def convert_field_keys(items):
             raise InvalidQueryError(
                 "DataRow.row_data missing when creating DataRow.")
 
-        # If row data is a local file path, upload it to server.
         row_data = args[DataRow.row_data.name]
-        if os.path.exists(row_data):
+        if not isinstance(row_data, str):
+            # If the row data is an object, upload as a string
+            args[DataRow.row_data.name] = json.dumps(row_data)
+        elif os.path.exists(row_data):
+            # If row data is a local file path, upload it to server.
             args[DataRow.row_data.name] = self.client.upload_file(row_data)
         args[DataRow.dataset.name] = self
 
@@ -106,6 +109,7 @@ def convert_field_keys(items):
             mdo = self.client.get_data_row_metadata_ontology()
             args[DataRow.metadata_fields.name] = mdo.parse_upsert_metadata(
                 args[DataRow.metadata_fields.name])
+
         return self.client._create(DataRow, args)
 
     def create_data_rows_sync(self, items) -> None:
diff --git a/labelbox/schema/media_type.py b/labelbox/schema/media_type.py
index c4e139a67..aaddb83be 100644
--- a/labelbox/schema/media_type.py
+++ b/labelbox/schema/media_type.py
@@ -21,9 +21,9 @@ class MediaType(Enum):
 
     @classmethod
     def _missing_(cls, name):
-        """Handle missing null data types for projects 
+        """Handle missing null data types for projects
             created without setting allowedMediaType
-            Handle upper case names for compatibility with 
+            Handle upper case names for compatibility with
             the GraphQL"""
 
         if name is None:

From 6b9f0e378b94e050344435c7e92d7a69be92863a Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Tue, 25 Oct 2022 06:28:54 -0400
Subject: [PATCH 03/14] add more tests

---
 tests/integration/test_dataset.py | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py
index 8237f77bd..9cd40a07d 100644
--- a/tests/integration/test_dataset.py
+++ b/tests/integration/test_dataset.py
@@ -2,7 +2,7 @@
 import pytest
 import requests
 from labelbox import Dataset
-from labelbox.exceptions import ResourceNotFoundError, MalformedQueryException
+from labelbox.exceptions import ResourceNotFoundError, MalformedQueryException, InvalidQueryError
 from labelbox.schema.dataset import MAX_DATAROW_PER_API_OPERATION
 
 
@@ -103,6 +103,33 @@ def test_upload_video_file(dataset, sample_video: str) -> None:
         assert response.headers['Content-Type'] == 'video/mp4'
 
 
+def test_create_pdf(dataset):
+    dataset.create_data_row(
+        row_data={
+            "pdfUrl":
+                "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-1.pdf",
+            "textLayerUrl":
+                "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-custom-text-layer.json"
+        })
+    dataset.create_data_row(row_data={
+        "pdfUrl":
+            "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-1.pdf",
+        "textLayerUrl":
+            "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-custom-text-layer.json"
+    },
+                            media_type="PDF")
+
+    with pytest.raises(InvalidQueryError):
+        # Wrong media type
+        dataset.create_data_row(row_data={
+            "pdfUrl":
+                "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-1.pdf",
+            "textLayerUrl":
+                "https://lb-test-data.s3.us-west-1.amazonaws.com/document-samples/sample-document-custom-text-layer.json"
+        },
+                                media_type="TEXT")
+
+
 def test_bulk_conversation(dataset, sample_bulk_conversation: list) -> None:
     """
     Tests that bulk conversations can be uploaded.

From 8e35a26f8fc19de946f0c0924b8e4913898a101e Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Tue, 25 Oct 2022 07:44:21 -0400
Subject: [PATCH 04/14] fix tests

---
 labelbox/schema/data_row.py                    |  9 ++++++---
 labelbox/schema/dataset.py                     |  4 ++--
 labelbox/test.py                               | 18 ++++++++++++++++++
 .../test_data_row_media_attributes.py          |  2 +-
 tests/integration/test_data_rows.py            |  8 +++++---
 5 files changed, 32 insertions(+), 9 deletions(-)
 create mode 100644 labelbox/test.py

diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py
index e33364ad7..1cfe41a28 100644
--- a/labelbox/schema/data_row.py
+++ b/labelbox/schema/data_row.py
@@ -61,12 +61,15 @@ class DataRow(DbObject, Updateable, BulkDeletable):
     supported_meta_types = supported_attachment_types = set(
         Entity.AssetAttachment.AttachmentType.__members__)
 
-    def __init__(self, client, field_values, **kwargs):
-        field_values.update({'mediaType': MediaType.Unknown})
-        super().__init__(client, field_values, **kwargs)
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
         self.attachments.supports_filtering = False
         self.attachments.supports_sorting = False
 
+    def _set_field_values(self, field_values):
+        field_values.update({'mediaType': MediaType.Unknown})
+        super()._set_field_values(field_values)
+
     @staticmethod
     def bulk_delete(data_rows) -> None:
         """ Deletes all the given DataRows.
diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py
index b820d3c19..7833d8286 100644
--- a/labelbox/schema/dataset.py
+++ b/labelbox/schema/dataset.py
@@ -353,8 +353,8 @@ def validate_keys(item):
 
         def formatLegacyConversationalData(item):
             messages = item.pop("conversationalData")
-            version = item.pop("version")
-            type = item.pop("type")
+            version = item.pop("version", 1)
+            type = item.pop("type", "application/vnd.labelbox.conversational")
             if "externalId" in item:
                 external_id = item.pop("externalId")
                 item["external_id"] = external_id
diff --git a/labelbox/test.py b/labelbox/test.py
new file mode 100644
index 000000000..8fd8365a6
--- /dev/null
+++ b/labelbox/test.py
@@ -0,0 +1,18 @@
+def run(fn, model_run):
+    try:
+        fn()
+    except Exception as e:
+        model_run.update_status(error_message=error)
+        pipelines[pipeline].update_status(PipelineState.FAILED,
+                                          json_data['model_run_id'],
+                                          error_message=str(e))
+    else:
+        status
+
+
+def model_run(payload):
+
+    def etl():
+        payload
+
+    run(etl)
diff --git a/tests/integration/test_data_row_media_attributes.py b/tests/integration/test_data_row_media_attributes.py
index d2e1c10b0..e2a594627 100644
--- a/tests/integration/test_data_row_media_attributes.py
+++ b/tests/integration/test_data_row_media_attributes.py
@@ -7,4 +7,4 @@ def test_export_empty_media_attributes(configured_project_with_label):
     sleep(10)
     labels = project.label_generator()
     label = next(labels)
-    assert label.data.media_attributes == {}
\ No newline at end of file
+    assert label.data.media_attributes == {}
diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
index 4519572d7..5fcdc8874 100644
--- a/tests/integration/test_data_rows.py
+++ b/tests/integration/test_data_rows.py
@@ -719,10 +719,12 @@ def test_create_conversational_text(dataset):
     }
     examples = [
         {
-            **content, 'media_type': 'CONVERSATIONAL_TEXT'
+            **content, 'media_type': 'CONVERSATIONAL'
         },
         content,
-        content['row_data']  # Old way to check for backwards compatibility
+        {
+            "conversationalData": content['row_data']['messages']
+        }  # Old way to check for backwards compatibility
     ]
     dataset.create_data_rows_sync(examples)
     data_rows = list(dataset.data_rows())
@@ -754,7 +756,7 @@ def test_invalid_media_type(dataset):
             "Found invalid contents for media type: 'IMAGE'", 'IMAGE'
     ], ["Found invalid media type: 'totallyinvalid'", 'totallyinvalid']]:
         # TODO: What error kind should this be? It looks like for global key we are
-        # using malformed query. But for FileUploads we use InvalidQueryError
+        # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError
         with pytest.raises(labelbox.exceptions.InvalidQueryError):
             dataset.create_data_rows_sync([{
                 **content, 'media_type': invalid_media_type

From 898b471bbda33029dfaf67edefade73ec9bd81c8 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Tue, 25 Oct 2022 07:59:52 -0400
Subject: [PATCH 05/14] update tests

---
 tests/integration/test_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py
index 9cd40a07d..89a89b78c 100644
--- a/tests/integration/test_dataset.py
+++ b/tests/integration/test_dataset.py
@@ -160,7 +160,7 @@ def test_create_descriptor_file(dataset):
         upload_data_spy.assert_called()
         call_args, call_kwargs = upload_data_spy.call_args_list[0][
             0], upload_data_spy.call_args_list[0][1]
-        assert call_args == ('[{"data": "some text..."}]',)
+        assert call_args == ('[{"row_data": "some text..."}]',)
         assert call_kwargs == {
             'content_type': 'application/json',
             'filename': 'json_import.json'

From d181d0fa5bb724832bc326f397938defb8b9e599 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Tue, 1 Nov 2022 09:17:17 -0400
Subject: [PATCH 06/14] add support for attachments

---
 labelbox/orm/model.py       |  4 ++--
 labelbox/schema/data_row.py |  2 --
 labelbox/schema/dataset.py  | 25 +++++++++++++++++++++++--
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/labelbox/orm/model.py b/labelbox/orm/model.py
index cce43b571..79783c10a 100644
--- a/labelbox/orm/model.py
+++ b/labelbox/orm/model.py
@@ -104,8 +104,8 @@ def DateTime(*args):
         return Field(Field.Type.DateTime, *args)
 
     @staticmethod
-    def Enum(enum_cls: type, *args, **kwargs):
-        return Field(Field.EnumType(enum_cls), *args, **kwargs)
+    def Enum(enum_cls: type, *args):
+        return Field(Field.EnumType(enum_cls), *args)
 
     @staticmethod
     def Json(*args):
diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py
index 1cfe41a28..24fc25e9c 100644
--- a/labelbox/schema/data_row.py
+++ b/labelbox/schema/data_row.py
@@ -49,7 +49,6 @@ class DataRow(DbObject, Updateable, BulkDeletable):
                           name="metadata",
                           graphql_name="customMetadata",
                           result_subquery="customMetadata { schemaId value }")
-    media_type = Field.Enum(MediaType, "media_type", result_subquery="")
 
     # Relationships
     dataset = Relationship.ToOne("Dataset")
@@ -67,7 +66,6 @@ def __init__(self, *args, **kwargs):
         self.attachments.supports_sorting = False
 
     def _set_field_values(self, field_values):
-        field_values.update({'mediaType': MediaType.Unknown})
         super()._set_field_values(field_values)
 
     @staticmethod
diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py
index 7833d8286..103846a97 100644
--- a/labelbox/schema/dataset.py
+++ b/labelbox/schema/dataset.py
@@ -15,6 +15,7 @@
 from labelbox.exceptions import InvalidQueryError, LabelboxError, ResourceNotFoundError, InvalidAttributeError
 from labelbox.orm.db_object import DbObject, Updateable, Deletable
 from labelbox.orm.model import Entity, Field, Relationship
+from labelbox.orm import query
 from labelbox.exceptions import MalformedQueryException
 
 if TYPE_CHECKING:
@@ -102,7 +103,6 @@ def convert_field_keys(items):
         elif os.path.exists(row_data):
             # If row data is a local file path, upload it to server.
             args[DataRow.row_data.name] = self.client.upload_file(row_data)
-        args[DataRow.dataset.name] = self
 
         # Parse metadata fields, if they are provided
         if DataRow.metadata_fields.name in args:
@@ -110,7 +110,28 @@ def convert_field_keys(items):
             args[DataRow.metadata_fields.name] = mdo.parse_upsert_metadata(
                 args[DataRow.metadata_fields.name])
 
-        return self.client._create(DataRow, args)
+        query_str = """mutation CreateDataRowPyApi(
+            $row_data: String!,
+            $metadata_fields: [DataRowCustomMetadataUpsertInput!]!,
+            $attachments: [DataRowAttachmentInput!],
+            $media_type : MediaType,
+            $dataset: ID!
+            ){
+                createDataRow(
+                    data:
+                      {
+                        rowData: $row_data
+                        mediaType: $media_type
+                        metadataFields: $metadata_fields
+                        attachments: $attachments
+                        dataset: {connect: {id: $dataset}}
+                    }
+                   )
+                {%s}
+            }
+        """ % query.results_query_part(Entity.DataRow)
+        res = self.client.execute(query_str, {**args, 'dataset': self.uid})
+        return DataRow(self.client, res['createDataRow'])
 
     def create_data_rows_sync(self, items) -> None:
         """ Synchronously bulk upload data rows.

From 976fe6f36c17d98aab02fc598971f68d48cb1a5b Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Tue, 1 Nov 2022 10:40:20 -0400
Subject: [PATCH 07/14] add tests

---
 labelbox/schema/dataset.py          |   2 +-
 tests/integration/test_data_rows.py | 153 +++++++++++++++-------------
 2 files changed, 83 insertions(+), 72 deletions(-)

diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py
index 103846a97..75641e3c5 100644
--- a/labelbox/schema/dataset.py
+++ b/labelbox/schema/dataset.py
@@ -112,7 +112,7 @@ def convert_field_keys(items):
 
         query_str = """mutation CreateDataRowPyApi(
             $row_data: String!,
-            $metadata_fields: [DataRowCustomMetadataUpsertInput!]!,
+            $metadata_fields: [DataRowCustomMetadataUpsertInput!],
             $attachments: [DataRowAttachmentInput!],
             $media_type : MediaType,
             $dataset: ID!
diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
index 5fcdc8874..742d84a3f 100644
--- a/tests/integration/test_data_rows.py
+++ b/tests/integration/test_data_rows.py
@@ -29,6 +29,56 @@ def mdo(client):
     yield mdo
 
 
+@pytest.fixture
+def conversational_content():
+    return {
+        'row_data': {
+            "messages": [{
+                "messageId": "message-0",
+                "timestampUsec": 1530718491,
+                "content": "I love iphone! i just bought new iphone! 🥰 📲",
+                "user": {
+                    "userId": "Bot 002",
+                    "name": "Bot"
+                },
+                "align": "left",
+                "canLabel": False
+            }],
+            "version": 1,
+            "type": "application/vnd.labelbox.conversational"
+        }
+    }
+
+
+@pytest.fixture
+def tile_content():
+    return {
+        "row_data": {
+            "tileLayerUrl":
+                "https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png",
+            "bounds": [[19.405662413477728, -99.21052827588443],
+                       [19.400498983095076, -99.20534818927473]],
+            "minZoom":
+                12,
+            "maxZoom":
+                20,
+            "epsg":
+                "EPSG4326",
+            "alternativeLayers": [{
+                "tileLayerUrl":
+                    "https://api.mapbox.com/styles/v1/mapbox/satellite-streets-v11/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw",
+                "name":
+                    "Satellite"
+            }, {
+                "tileLayerUrl":
+                    "https://api.mapbox.com/styles/v1/mapbox/navigation-guidance-night-v4/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw",
+                "name":
+                    "Guidance"
+            }]
+        }
+    }
+
+
 def make_metadata_fields():
     embeddings = [0.0] * 128
     msg = "A message"
@@ -699,31 +749,14 @@ def test_data_row_rulk_creation_sync_with_same_global_keys(
     assert list(dataset.data_rows())[0].global_key == global_key_1
 
 
-def test_create_conversational_text(dataset):
-    content = {
-        'row_data': {
-            "messages": [{
-                "messageId": "message-0",
-                "timestampUsec": 1530718491,
-                "content": "I love iphone! i just bought new iphone! 🥰 📲",
-                "user": {
-                    "userId": "Bot 002",
-                    "name": "Bot"
-                },
-                "align": "left",
-                "canLabel": False
-            }],
-            "version": 1,
-            "type": "application/vnd.labelbox.conversational"
-        }
-    }
+def test_create_conversational_text(dataset, conversational_content):
     examples = [
         {
-            **content, 'media_type': 'CONVERSATIONAL'
+            **conversational_content, 'media_type': 'CONVERSATIONAL'
         },
-        content,
+        conversational_content,
         {
-            "conversationalData": content['row_data']['messages']
+            "conversationalData": conversational_content['row_data']['messages']
         }  # Old way to check for backwards compatibility
     ]
     dataset.create_data_rows_sync(examples)
@@ -733,25 +766,7 @@ def test_create_conversational_text(dataset):
         assert requests.get(data_row.row_data).json() == content['row_data']
 
 
-def test_invalid_media_type(dataset):
-    content = {
-        'row_data': {
-            "messages": [{
-                "messageId": "message-0",
-                "timestampUsec": 1530718491,
-                "content": "I love iphone! i just bought new iphone! 🥰 📲",
-                "user": {
-                    "userId": "Bot 002",
-                    "name": "Bot"
-                },
-                "align": "left",
-                "canLabel": False
-            }],
-            "version": 1,
-            "type": "application/vnd.labelbox.conversational"
-        }
-    }
-
+def test_invalid_media_type(dataset, conversational_content):
     for error_message, invalid_media_type in [[
             "Found invalid contents for media type: 'IMAGE'", 'IMAGE'
     ], ["Found invalid media type: 'totallyinvalid'", 'totallyinvalid']]:
@@ -759,51 +774,47 @@ def test_invalid_media_type(dataset):
         # using malformed query. But for invalid contents in FileUploads we use InvalidQueryError
         with pytest.raises(labelbox.exceptions.InvalidQueryError):
             dataset.create_data_rows_sync([{
-                **content, 'media_type': invalid_media_type
+                **conversational_content, 'media_type': invalid_media_type
             }])
 
         task = dataset.create_data_rows([{
-            **content, 'media_type': invalid_media_type
+            **conversational_content, 'media_type': invalid_media_type
         }])
         task.wait_till_done()
         assert task.errors == {'message': error_message}
 
 
-def test_create_tiled_layer(dataset):
-    content = {
-        "row_data": {
-            "tileLayerUrl":
-                "https://s3-us-west-1.amazonaws.com/lb-tiler-layers/mexico_city/{z}/{x}/{y}.png",
-            "bounds": [[19.405662413477728, -99.21052827588443],
-                       [19.400498983095076, -99.20534818927473]],
-            "minZoom":
-                12,
-            "maxZoom":
-                20,
-            "epsg":
-                "EPSG4326",
-            "alternativeLayers": [{
-                "tileLayerUrl":
-                    "https://api.mapbox.com/styles/v1/mapbox/satellite-streets-v11/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw",
-                "name":
-                    "Satellite"
-            }, {
-                "tileLayerUrl":
-                    "https://api.mapbox.com/styles/v1/mapbox/navigation-guidance-night-v4/tiles/{z}/{x}/{y}?access_token=pk.eyJ1IjoibWFwYm94IiwiYSI6ImNpejY4NXVycTA2emYycXBndHRqcmZ3N3gifQ.rJcFIG214AriISLbB6B5aw",
-                "name":
-                    "Guidance"
-            }]
-        }
-    }
+def test_create_tiled_layer(dataset, tile_content):
     examples = [
         {
-            **content, 'media_type': 'TMS_SIMPLE'
+            **tile_content, 'media_type': 'TMS_SIMPLE'
         },
-        content,
-        content['row_data']  # Old way to check for backwards compatibility
+        tile_content,
+        tile_content['row_data']  # Old way to check for backwards compatibility
     ]
     dataset.create_data_rows_sync(examples)
     data_rows = list(dataset.data_rows())
     assert len(data_rows) == len(examples)
     for data_row in data_rows:
         assert json.loads(data_row.row_data) == content['row_data']
+
+
+def test_create_data_row_with_attachments(dataset):
+    attachment_value = 'attachment value'
+    dr = dataset.create_data_row(row_data="123",
+                                 attachments=[{
+                                     'type': 'TEXT',
+                                     'value': attachment_value
+                                 }])
+    attachments = list(dr.attachments())
+    assert len(attachments) == 1
+
+
+def test_create_data_row_with_media_type(dataset, image_url):
+    with pytest.raises(labelbox.exceptions.InvalidQueryError) as exc:
+        dr = dataset.create_data_row(
+            row_data={'invalid_object': 'invalid_value'}, media_type="IMAGE")
+    assert "Found invalid contents for media type: \'IMAGE\'" in str(exc.value)
+
+    dataset.create_data_row(row_data=image_url, media_type="IMAGE")
+

From d95fc2d60dd7e3c1fe87a466539f33e3ea7c3b68 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff1@users.noreply.github.com>
Date: Tue, 1 Nov 2022 16:13:36 -0400
Subject: [PATCH 08/14] Delete test.py

---
 labelbox/test.py | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 labelbox/test.py

diff --git a/labelbox/test.py b/labelbox/test.py
deleted file mode 100644
index 8fd8365a6..000000000
--- a/labelbox/test.py
+++ /dev/null
@@ -1,18 +0,0 @@
-def run(fn, model_run):
-    try:
-        fn()
-    except Exception as e:
-        model_run.update_status(error_message=error)
-        pipelines[pipeline].update_status(PipelineState.FAILED,
-                                          json_data['model_run_id'],
-                                          error_message=str(e))
-    else:
-        status
-
-
-def model_run(payload):
-
-    def etl():
-        payload
-
-    run(etl)

From 760241c21dc1000da90e71c2b627440567055227 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff1@users.noreply.github.com>
Date: Tue, 1 Nov 2022 16:13:57 -0400
Subject: [PATCH 09/14] Update data_row.py

---
 labelbox/schema/data_row.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py
index 24fc25e9c..4c7bb8287 100644
--- a/labelbox/schema/data_row.py
+++ b/labelbox/schema/data_row.py
@@ -5,7 +5,6 @@
 from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable
 from labelbox.orm.model import Entity, Field, Relationship
 from labelbox.schema.data_row_metadata import DataRowMetadataField  # type: ignore
-from labelbox.schema.media_type import MediaType
 
 if TYPE_CHECKING:
     from labelbox import AssetAttachment
@@ -65,9 +64,6 @@ def __init__(self, *args, **kwargs):
         self.attachments.supports_filtering = False
         self.attachments.supports_sorting = False
 
-    def _set_field_values(self, field_values):
-        super()._set_field_values(field_values)
-
     @staticmethod
     def bulk_delete(data_rows) -> None:
         """ Deletes all the given DataRows.

From c8b81e1037b52ef764e4c355c244de9d3d4ea2f2 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Thu, 3 Nov 2022 06:54:36 -0400
Subject: [PATCH 10/14] format

---
 tests/integration/test_data_rows.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
index 742d84a3f..7e744af21 100644
--- a/tests/integration/test_data_rows.py
+++ b/tests/integration/test_data_rows.py
@@ -817,4 +817,3 @@ def test_create_data_row_with_media_type(dataset, image_url):
     assert "Found invalid contents for media type: \'IMAGE\'" in str(exc.value)
 
     dataset.create_data_row(row_data=image_url, media_type="IMAGE")
-

From 18c31316b93d1d03e9fde3486aba516b5f3eaf6d Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Thu, 3 Nov 2022 10:33:34 -0400
Subject: [PATCH 11/14] add global key and external id to create data row

---
 labelbox/schema/dataset.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py
index 75641e3c5..bdfe02e35 100644
--- a/labelbox/schema/dataset.py
+++ b/labelbox/schema/dataset.py
@@ -115,6 +115,8 @@ def convert_field_keys(items):
             $metadata_fields: [DataRowCustomMetadataUpsertInput!],
             $attachments: [DataRowAttachmentInput!],
             $media_type : MediaType,
+            $external_id : String,
+            $global_key : String,
             $dataset: ID!
             ){
                 createDataRow(
@@ -123,6 +125,8 @@ def convert_field_keys(items):
                         rowData: $row_data
                         mediaType: $media_type
                         metadataFields: $metadata_fields
+                        externalId: $external_id
+                        globalKey: $global_key
                         attachments: $attachments
                         dataset: {connect: {id: $dataset}}
                     }

From 4d1062b97e3350578d355dcd71a5988d1640d7d2 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Thu, 3 Nov 2022 12:22:24 -0400
Subject: [PATCH 12/14] fix var name in test

---
 tests/integration/test_data_rows.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
index 7e744af21..d256caf60 100644
--- a/tests/integration/test_data_rows.py
+++ b/tests/integration/test_data_rows.py
@@ -763,7 +763,8 @@ def test_create_conversational_text(dataset, conversational_content):
     data_rows = list(dataset.data_rows())
     assert len(data_rows) == len(examples)
     for data_row in data_rows:
-        assert requests.get(data_row.row_data).json() == content['row_data']
+        assert requests.get(
+            data_row.row_data).json() == conversational_content['row_data']
 
 
 def test_invalid_media_type(dataset, conversational_content):
@@ -796,7 +797,7 @@ def test_create_tiled_layer(dataset, tile_content):
     data_rows = list(dataset.data_rows())
     assert len(data_rows) == len(examples)
     for data_row in data_rows:
-        assert json.loads(data_row.row_data) == content['row_data']
+        assert json.loads(data_row.row_data) == tile_content['row_data']
 
 
 def test_create_data_row_with_attachments(dataset):

From 8c558602ef2996395de1979c372aa972b5be3475 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Mon, 7 Nov 2022 09:21:43 -0500
Subject: [PATCH 13/14] create data row with objects

---
 labelbox/schema/data_row.py         | 9 +++++++++
 tests/integration/test_data_rows.py | 7 +++++++
 2 files changed, 16 insertions(+)

diff --git a/labelbox/schema/data_row.py b/labelbox/schema/data_row.py
index 4c7bb8287..0ec7a4e6e 100644
--- a/labelbox/schema/data_row.py
+++ b/labelbox/schema/data_row.py
@@ -1,5 +1,6 @@
 import logging
 from typing import TYPE_CHECKING
+import json
 
 from labelbox.orm import query
 from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable
@@ -64,6 +65,14 @@ def __init__(self, *args, **kwargs):
         self.attachments.supports_filtering = False
         self.attachments.supports_sorting = False
 
+    def update(self, **kwargs):
+        # Convert row data to string if it is an object
+        # All other updates pass through
+        row_data = kwargs.get("row_data")
+        if isinstance(row_data, dict):
+            kwargs['row_data'] = json.dumps(kwargs['row_data'])
+        super().update(**kwargs)
+
     @staticmethod
     def bulk_delete(data_rows) -> None:
         """ Deletes all the given DataRows.
diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
index d256caf60..bcbd2c315 100644
--- a/tests/integration/test_data_rows.py
+++ b/tests/integration/test_data_rows.py
@@ -459,6 +459,13 @@ def test_data_row_update(dataset, rand_gen, image_url):
     data_row.update(external_id=external_id_2)
     assert data_row.external_id == external_id_2
 
+    data_row.update(row_data="123")
+    assert data_row.row_data == "123"
+
+    # tileLayer becomes a media attribute
+    data_row.update(row_data={'pdfUrl': "123", "tileLayerUrl": "123"})
+    assert data_row.row_data == "123"
+
 
 def test_data_row_filtering_sorting(dataset, image_url):
     task = dataset.create_data_rows([

From c1812cd22b4e75b80734245529dde721d2b1eb54 Mon Sep 17 00:00:00 2001
From: Matt Sokoloff <msokoloff@labelbox.com>
Date: Thu, 10 Nov 2022 13:18:57 -0500
Subject: [PATCH 14/14] fix data row update test

---
 tests/integration/test_data_rows.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_data_rows.py b/tests/integration/test_data_rows.py
index bcbd2c315..d885c4391 100644
--- a/tests/integration/test_data_rows.py
+++ b/tests/integration/test_data_rows.py
@@ -459,12 +459,17 @@ def test_data_row_update(dataset, rand_gen, image_url):
     data_row.update(external_id=external_id_2)
     assert data_row.external_id == external_id_2
 
-    data_row.update(row_data="123")
-    assert data_row.row_data == "123"
+    in_line_content = "123"
+    data_row.update(row_data=in_line_content)
+    assert requests.get(data_row.row_data).text == in_line_content
+
+    data_row.update(row_data=image_url)
+    assert data_row.row_data == image_url
 
     # tileLayer becomes a media attribute
-    data_row.update(row_data={'pdfUrl': "123", "tileLayerUrl": "123"})
-    assert data_row.row_data == "123"
+    pdf_url = "http://somepdfurl"
+    data_row.update(row_data={'pdfUrl': pdf_url, "tileLayerUrl": "123"})
+    assert data_row.row_data == pdf_url
 
 
 def test_data_row_filtering_sorting(dataset, image_url):