Labelbox · farkob · Apr 27, 2022 · Apr 22, 2022 · Apr 22, 2022 · Apr 22, 2022
@@ -1,5 +1,20 @@
 # Changelog
 
+# Version 3.20.0 (2022-04-27)
+## Added
+* Batches in a project can be retrieved with `project.batches()`
+* Added `Batch.remove_queued_data_rows()` to cancel remaining data rows in batch
+* Added `Batch.export_data_rows()` which returns `DataRow`s for a batch
+
+## Updated
+* NDJsonConverter now supports Video bounding box annotations.
+    * Note: Currently does not support nested classifications.
+    * Note: Converting an export into Labelbox annotation types, and back to export will result in only keyframe annotations. This is to support correct import format.
+
+
+## Fix
+* `batch.project()` now works
+
 # Version 3.19.1 (2022-04-14)
 ## Fix
 * `create_data_rows` and `create_data_rows_sync` now uploads the file with a mimetype

@@ -1,5 +1,5 @@
 name = "labelbox"
-__version__ = "3.19.1"
+__version__ = "3.20.0"
 
 import sys
 import warnings

@@ -347,6 +347,7 @@ class Entity(metaclass=EntityMeta):
         Invite: Type[labelbox.Invite]
         InviteLimit: Type[labelbox.InviteLimit]
         ProjectRole: Type[labelbox.ProjectRole]
+        Project: Type[labelbox.Project]
         Batch: Type[labelbox.Batch]
 
     @classmethod

@@ -25,7 +25,7 @@ def __init__(self,
                  params: Dict[str, str],
                  dereferencing: Union[List[str], Dict[str, Any]],
                  obj_class: Union[Type["DbObject"], Callable[[Any, Any], Any]],
-                 cursor_path: Optional[Dict[str, Any]] = None,
+                 cursor_path: Optional[List[str]] = None,
                  experimental: bool = False):
         """ Creates a PaginatedCollection.
 
@@ -105,7 +105,7 @@ def get_next_page(self) -> Tuple[Dict[str, Any], bool]:
 
 class _CursorPagination(_Pagination):
 
-    def __init__(self, cursor_path: Dict[str, Any], *args, **kwargs):
+    def __init__(self, cursor_path: List[str], *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.cursor_path = cursor_path
         self.next_cursor: Optional[Any] = None

@@ -1,5 +1,18 @@
-from labelbox.orm.db_object import DbObject
-from labelbox.orm.model import Field, Relationship
+from typing import Generator, TYPE_CHECKING
+from labelbox.orm.db_object import DbObject, experimental
+from labelbox.orm import query
+from labelbox.orm.model import Entity, Field, Relationship
+from labelbox.exceptions import LabelboxError, ResourceNotFoundError
+from io import StringIO
+import ndjson
+import requests
+import logging
+import time
+
+if TYPE_CHECKING:
+    from labelbox import Project
+
+logger = logging.getLogger(__name__)
 
 
 class Batch(DbObject):
@@ -21,5 +34,87 @@ class Batch(DbObject):
     size = Field.Int("size")
 
     # Relationships
-    project = Relationship.ToOne("Project")
     created_by = Relationship.ToOne("User")
+
+    def __init__(self, client, project_id, *args, **kwargs):
+        super().__init__(client, *args, **kwargs)
+        self.project_id = project_id
+
+    def project(self) -> 'Project':  # type: ignore
+        """ Returns Project which this Batch belongs to
+
+        Raises:
+            LabelboxError: if the project is not found
+        """
+        query_str = """query getProjectPyApi($projectId: ID!) {
+            project(
+                where: {id: $projectId}){
+                    %s
+                }}""" % query.results_query_part(Entity.Project)
+        params = {"projectId": self.project_id}
+        response = self.client.execute(query_str, params)
+
+        if response is None:
+            raise ResourceNotFoundError(Entity.Project, params)
+
+        return Entity.Project(self.client, response["project"])
+
+    def remove_queued_data_rows(self) -> None:
+        """ Removes remaining queued data rows from the batch and labeling queue.
+
+        Args:
+            batch (Batch): Batch to remove queued data rows from
+        """
+
+        project_id_param = "projectId"
+        batch_id_param = "batchId"
+        self.client.execute("""mutation ArchiveBatchPyApi($%s: ID!, $%s: ID!) {
+            project(where: {id: $%s}) { archiveBatch(batchId: $%s) { id archivedAt } }
+        }""" % (project_id_param, batch_id_param, project_id_param,
+                batch_id_param), {
+                    project_id_param: self.project_id,
+                    batch_id_param: self.uid
+                },
+                            experimental=True)
+
+    def export_data_rows(self, timeout_seconds=120) -> Generator:
+        """ Returns a generator that produces all data rows that are currently
+        in this batch.
+
+        Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear
+        until the end of the cache period.
+
+        Args:
+            timeout_seconds (float): Max waiting time, in seconds.
+        Returns:
+            Generator that yields DataRow objects belonging to this batch.
+        Raises:
+            LabelboxError: if the export fails or is unable to download within the specified time.
+        """
+        id_param = "batchId"
+        query_str = """mutation GetBatchDataRowsExportUrlPyApi($%s: ID!)
+            {exportBatchDataRows(data:{batchId: $%s }) {downloadUrl createdAt status}}
+        """ % (id_param, id_param)
+        sleep_time = 2
+        while True:
+            res = self.client.execute(query_str, {id_param: self.uid})
+            res = res["exportBatchDataRows"]
+            if res["status"] == "COMPLETE":
+                download_url = res["downloadUrl"]
+                response = requests.get(download_url)
+                response.raise_for_status()
+                reader = ndjson.reader(StringIO(response.text))
+                return (
+                    Entity.DataRow(self.client, result) for result in reader)
+            elif res["status"] == "FAILED":
+                raise LabelboxError("Data row export failed.")
+
+            timeout_seconds -= sleep_time
+            if timeout_seconds <= 0:
+                raise LabelboxError(
+                    f"Unable to export data rows within {timeout_seconds} seconds."
+                )
+
+            logger.debug("Batch '%s' data row export, waiting for server...",
+                         self.uid)
+            time.sleep(sleep_time)
@@ -298,7 +298,7 @@ def export_labels(self,
 
         def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str:
             """Returns a concatenated string of the dictionary's keys and values
-            
+
             The string will be formatted as {key}: 'value' for each key. Value will be inclusive of
             quotations while key will not. This can be toggled with `value_with_quotes`"""
 
@@ -609,7 +609,7 @@ def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
                                   experimental=True)["project"][method]
 
         res['size'] = len(dr_ids)
-        return Entity.Batch(self.client, res)
+        return Entity.Batch(self.client, self.uid, res)
 
     def _update_queue_mode(self,
                            mode: "Project.QueueMode") -> "Project.QueueMode":
@@ -840,6 +840,24 @@ def bulk_import_requests(self) -> PaginatedCollection:
                                    ["bulkImportRequests"],
                                    Entity.BulkImportRequest)
 
+    def batches(self) -> PaginatedCollection:
+        """ Fetch all batches that belong to this project
+
+        Returns:
+            A `PaginatedCollection of `Batch`es
+        """
+        id_param = "projectId"
+        query_str = """query GetProjectBatchesPyApi($from: String, $first: PageSize, $%s: ID!) {
+            project(where: {id: $%s}) {id
+            batches(after: $from, first: $first) { nodes { %s } pageInfo { endCursor }}}}
+        """ % (id_param, id_param, query.results_query_part(Entity.Batch))
+        return PaginatedCollection(
+            self.client,
+            query_str, {id_param: self.uid}, ['project', 'batches', 'nodes'],
+            lambda client, res: Entity.Batch(client, self.uid, res),
+            cursor_path=['project', 'batches', 'pageInfo', 'endCursor'],
+            experimental=True)
+
     def upload_annotations(
             self,
             name: str,

@@ -18,10 +18,65 @@ def big_dataset(dataset: Dataset):
     yield dataset
 
 
+@pytest.fixture
+def small_dataset(dataset: Dataset):
+    task = dataset.create_data_rows([
+        {
+            "row_data": IMAGE_URL,
+            "external_id": "my-image"
+        },
+    ] * 3)
+    task.wait_till_done()
+
+    yield dataset
+
+
 def test_create_batch(configured_project: Project, big_dataset: Dataset):
     configured_project.update(queue_mode=Project.QueueMode.Batch)
 
     data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
     batch = configured_project.create_batch("test-batch", data_rows, 3)
     assert batch.name == 'test-batch'
     assert batch.size == len(data_rows)
+
+
+def test_archive_batch(configured_project: Project, small_dataset: Dataset):
+    data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
+    configured_project.update(queue_mode=Project.QueueMode.Batch)
+    batch = configured_project.create_batch("batch to archive", data_rows)
+    batch.remove_queued_data_rows()
+    exported_data_rows = list(batch.export_data_rows())
+
+    assert len(exported_data_rows) == 0
+
+
+def test_batch_project(configured_project: Project, small_dataset: Dataset):
+    data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
+    configured_project.update(queue_mode=Project.QueueMode.Batch)
+    batch = configured_project.create_batch(
+        "batch to test project relationship", data_rows)
+    project_from_batch = batch.project()
+
+    assert project_from_batch.uid == configured_project.uid
+    assert project_from_batch.name == configured_project.name
+
+
+def test_export_data_rows(configured_project: Project, dataset: Dataset):
+    n_data_rows = 5
+    task = dataset.create_data_rows([
+        {
+            "row_data": IMAGE_URL,
+            "external_id": "my-image"
+        },
+    ] * n_data_rows)
+    task.wait_till_done()
+
+    data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
+    configured_project.update(queue_mode=Project.QueueMode.Batch)
+    batch = configured_project.create_batch("batch test", data_rows)
+
+    result = list(batch.export_data_rows())
+    exported_data_rows = [dr.uid for dr in result]
+
+    assert len(result) == n_data_rows
+    assert set(data_rows) == set(exported_data_rows)
@@ -4,7 +4,7 @@
 import pytest
 import requests
 
-from labelbox import Project, LabelingFrontend
+from labelbox import Project, LabelingFrontend, Dataset
 from labelbox.exceptions import InvalidQueryError
 
 
@@ -201,3 +201,22 @@ def test_queue_mode(configured_project: Project):
     ) == configured_project.QueueMode.Dataset
     configured_project.update(queue_mode=configured_project.QueueMode.Batch)
     assert configured_project.queue_mode() == configured_project.QueueMode.Batch
+
+
+def test_batches(configured_project: Project, dataset: Dataset, image_url):
+    task = dataset.create_data_rows([
+        {
+            "row_data": image_url,
+            "external_id": "my-image"
+        },
+    ] * 2)
+    task.wait_till_done()
+    configured_project.update(queue_mode=configured_project.QueueMode.Batch)
+    data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
+    batch_one = 'batch one'
+    batch_two = 'batch two'
+    configured_project.create_batch(batch_one, [data_rows[0]])
+    configured_project.create_batch(batch_two, [data_rows[1]])
+
+    names = set([batch.name for batch in list(configured_project.batches())])
+    assert names == set([batch_one, batch_two])