diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 3c98a966d..abf91da25 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -41,6 +41,14 @@ class DataRowUpsertItem(BaseModel): id: dict payload: dict + def is_empty(self) -> bool: + """ + The payload is considered empty if it's actually empty or the only key is `dataset_id`. + :return: bool + """ + return (not self.payload or + len(self.payload.keys()) == 1 and "dataset_id" in self.payload) + class Dataset(DbObject, Updateable, Deletable): """ A Dataset is a collection of DataRows. @@ -829,6 +837,14 @@ def upsert_data_rows(self, items, file_upload_thread_count=20) -> "Task": ) specs = self._convert_items_to_upsert_format(items) + + empty_specs = list(filter(lambda spec: spec.is_empty(), specs)) + + if empty_specs: + ids = list(map(lambda spec: spec.id.get("value"), empty_specs)) + raise ValueError( + f"The following items have an empty payload: {ids}") + chunks = [ specs[i:i + self.__upsert_chunk_size] for i in range(0, len(specs), self.__upsert_chunk_size) diff --git a/tests/integration/test_data_rows_upsert.py b/tests/integration/test_data_rows_upsert.py index 2cc893476..a17f87f8f 100644 --- a/tests/integration/test_data_rows_upsert.py +++ b/tests/integration/test_data_rows_upsert.py @@ -263,3 +263,9 @@ def test_upsert_duplicate_global_key_error(self, dataset, image_url): assert task.errors is not None assert len(task.errors) == 1 # one data row was created, one failed assert f"Duplicate global key: '{gkey}'" in task.errors[0]['message'] + + def test_upsert_empty_items(self, dataset): + items = [{"key": GlobalKey("foo")}] + with pytest.raises(ValueError) as e_info: + dataset.upsert_data_rows(items) + e_info.match(r"The following items have an empty payload: \['foo'\]")