Labelbox · msokoloff1 · Mar 11, 2021 · Mar 4, 2021 · Mar 4, 2021 · Mar 5, 2021
@@ -1,3 +1,5 @@
+from enum import Enum
+
 from labelbox.orm.db_object import DbObject
 from labelbox.orm.model import Field
 
@@ -9,9 +11,16 @@ class AssetMetadata(DbObject):
         meta_type (str): IMAGE, VIDEO, TEXT, or IMAGE_OVERLAY
         meta_value (str): URL to an external file or a string of text
     """
-    VIDEO = "VIDEO"
-    IMAGE = "IMAGE"
-    TEXT = "TEXT"
+
+    class MetaType(Enum):
+        VIDEO = "VIDEO"
+        IMAGE = "IMAGE"
+        TEXT = "TEXT"
+        IMAGE_OVERLAY = "IMAGE_OVERLAY"
+
+    # For backwards compatibility
+    for topic in MetaType:
+        vars()[topic.name] = topic.value
 
     meta_type = Field.String("meta_type")
     meta_value = Field.String("meta_value")
@@ -1,7 +1,7 @@
 from labelbox.orm import query
 from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable
 from labelbox.orm.model import Entity, Field, Relationship
-from labelbox.pagination import PaginatedCollection
+from labelbox.schema.asset_metadata import AssetMetadata
 
 
 class DataRow(DbObject, Updateable, BulkDeletable):
@@ -34,6 +34,10 @@ class DataRow(DbObject, Updateable, BulkDeletable):
     metadata = Relationship.ToMany("AssetMetadata", False, "metadata")
     predictions = Relationship.ToMany("Prediction", False)
 
+    supported_meta_types = {
+        meta_type.value for meta_type in AssetMetadata.MetaType
+    }
+
     @staticmethod
     def bulk_delete(data_rows):
         """ Deletes all the given DataRows.
@@ -55,11 +59,19 @@ def create_metadata(self, meta_type, meta_value):
 
         Args:
             meta_type (str): Asset metadata type, must be one of:
-                VIDEO, IMAGE, TEXT.
+                VIDEO, IMAGE, TEXT, IMAGE_OVERLAY (AssetMetadata.MetaType)
             meta_value (str): Asset metadata value.
         Returns:
             `AssetMetadata` DB object.
+        Raises:
+            ValueError: meta_type must be one of the supported types.
         """
+
+        if meta_type not in self.supported_meta_types:
+            raise ValueError(
+                f"meta_type must be one of {self.supported_meta_types}. Found {meta_type}"
+            )
+
         meta_type_param = "metaType"
         meta_value_param = "metaValue"
         data_row_id_param = "dataRowId"

@@ -1,11 +1,15 @@
+import os
 import json
+import logging
+from itertools import islice
 from multiprocessing.dummy import Pool as ThreadPool
-import os
 
 from labelbox.exceptions import InvalidQueryError, ResourceNotFoundError, InvalidAttributeError
 from labelbox.orm.db_object import DbObject, Updateable, Deletable
 from labelbox.orm.model import Entity, Field, Relationship
 
+logger = logging.getLogger(__name__)
+
 
 class Dataset(DbObject, Updateable, Deletable):
     """ A Dataset is a collection of DataRows.
@@ -163,12 +167,13 @@ def convert_item(item):
         task._user = user
         return task
 
-    def data_row_for_external_id(self, external_id):
+    def data_rows_for_external_id(self, external_id, limit=10):
         """ Convenience method for getting a single `DataRow` belonging to this
         `Dataset` that has the given `external_id`.
 
         Args:
             external_id (str): External ID of the sought `DataRow`.
+            limit (int): The maximum number of data rows to return for the given external_id
 
         Returns:
             A single `DataRow` with the given ID.
@@ -182,10 +187,32 @@ def data_row_for_external_id(self, external_id):
         where = DataRow.external_id == external_id
 
         data_rows = self.data_rows(where=where)
-        # Get at most two data_rows.
-        data_rows = [row for row, _ in zip(data_rows, range(2))]
+        # Get at most `limit` data_rows.
+        data_rows = list(islice(data_rows, limit))
 
-        if len(data_rows) != 1:
+        if not len(data_rows):
             raise ResourceNotFoundError(DataRow, where)
+        return data_rows
+
+    def data_row_for_external_id(self, external_id):
+        """ Convenience method for getting a single `DataRow` belonging to this
+        `Dataset` that has the given `external_id`.
 
+        Args:
+            external_id (str): External ID of the sought `DataRow`.
+
+        Returns:
+            A single `DataRow` with the given ID.
+
+        Raises:
+            labelbox.exceptions.ResourceNotFoundError: If there is no `DataRow`
+                in this `DataSet` with the given external ID, or if there are
+                multiple `DataRows` for it.
+        """
+        data_rows = self.data_rows_for_external_id(external_id=external_id,
+                                                   limit=2)
+        if len(data_rows) > 1:
+            logger.warning(
+                f"More than one data_row has the provided external_id : `%s`. Use function data_rows_for_external_id to fetch all",
+                external_id)
         return data_rows[0]
@@ -1,14 +1,15 @@
-from collections import namedtuple
-from datetime import datetime, timezone
 import json
+import time
 import logging
+from collections import namedtuple
+from datetime import datetime, timezone
 from pathlib import Path
-import time
 from typing import Dict, List, Union, Iterable
 from urllib.parse import urlparse
 
 from labelbox import utils
 from labelbox.schema.bulk_import_request import BulkImportRequest
+from labelbox.schema.data_row import DataRow
 from labelbox.exceptions import InvalidQueryError
 from labelbox.orm import query
 from labelbox.orm.db_object import DbObject, Updateable, Deletable
@@ -88,6 +89,9 @@ def create_label(self, **kwargs):
         # deprecated and we don't want the Py client lib user to know
         # about them. At the same time they're connected to a Label at
         # label creation in a non-standard way (connect via name).
+        logger.warning(
+            "`create_label` is deprecated and is not compatible with the new editor."
+        )
 
         Label = Entity.Label
 
@@ -196,7 +200,7 @@ def upsert_instructions(self, instructions_file: str):
         frontendId = frontend.uid
 
         if frontend.name != "Editor":
-            logger.warn(
+            logger.warning(
                 f"This function has only been tested to work with the Editor front end. Found %s",
                 frontend.name)
 
@@ -312,18 +316,66 @@ def setup(self, labeling_frontend, labeling_frontend_options):
         timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
         self.update(setup_complete=timestamp)
 
+    def validate_labeling_parameter_overrides(self, data):
+        for idx, row in enumerate(data):
+            if len(row) != 3:
+                raise TypeError(
+                    f"Data must be a list of tuples containing a DataRow, priority (int), num_labels (int). Found {len(row)} items. Index: {idx}"
+                )
+            data_row, priority, num_labels = row
+            if not isinstance(data_row, DataRow):
+                raise TypeError(
+                    f"data_row should be be of type DataRow. Found {type(data_row)}. Index: {idx}"
+                )
+
+            for name, value in [["Priority", priority],
+                                ["Number of labels", num_labels]]:
+                if not isinstance(value, int):
+                    raise TypeError(
+                        f"{name} must be an int. Found {type(value)} for data_row {data_row}. Index: {idx}"
+                    )
+                if value < 1:
+                    raise ValueError(
+                        f"{name} must be greater than 0 for data_row {data_row}. Index: {idx}"
+                    )
+
     def set_labeling_parameter_overrides(self, data):
         """ Adds labeling parameter overrides to this project.
-
+
+        See information on priority here:
+            https://docs.labelbox.com/en/configure-editor/queue-system#reservation-system
+
             >>> project.set_labeling_parameter_overrides([
             >>>     (data_row_1, 2, 3), (data_row_2, 1, 4)])
 
         Args:
             data (iterable): An iterable of tuples. Each tuple must contain
-                (DataRow, priority, numberOfLabels) for the new override.
+                (DataRow, priority<int>, number_of_labels<int>) for the new override.
+
+                Priority:
+                    * Data will be labeled in priority order.
+                        - A lower number priority is labeled first.
+                        - Minimum priority is 1.
+                    * Priority is not the queue position.
+                        - The position is determined by the relative priority.
+                        - E.g. [(data_row_1, 5,1), (data_row_2, 2,1), (data_row_3, 10,1)] 
+                            will be assigned in the following order: [data_row_2, data_row_1, data_row_3]
+                    * Datarows with parameter overrides will appear before datarows without overrides.
+                    * The priority only effects items in the queue.
+                        - Assigning a priority will not automatically add the item back into the queue.  
+                Number of labels:
+                    * The number of times a data row should be labeled.
+                        - Creates duplicate data rows in a project (one for each number of labels).
+                    * New duplicated data rows will be added to the queue.
+                        - Already labeled duplicates will not be sent back to the queue.
+                    * The queue will never assign the same datarow to a single labeler more than once.
+                        - If the number of labels is greater than the number of labelers working on a project then
+                            the extra items will remain in the queue (this can be fixed by removing the override at any time).
+                    * Setting this to 1 will result in the default behavior (no duplicates).
         Returns:
             bool, indicates if the operation was a success.
         """
+        self.validate_labeling_parameter_overrides(data)
         data_str = ",\n".join(
             "{dataRow: {id: \"%s\"}, priority: %d, numLabels: %d }" %
             (data_row.uid, priority, num_labels)
@@ -338,6 +390,8 @@ def set_labeling_parameter_overrides(self, data):
     def unset_labeling_parameter_overrides(self, data_rows):
         """ Removes labeling parameter overrides to this project.
 
+        * This will remove unlabeled duplicates in the queue.
+
         Args:
             data_rows (iterable): An iterable of DataRows.
         Returns:
@@ -353,12 +407,19 @@ def unset_labeling_parameter_overrides(self, data_rows):
         return res["project"]["unsetLabelingParameterOverrides"]["success"]
 
     def upsert_review_queue(self, quota_factor):
-        """ Reinitiates the review queue for this project.
+        """ Sets the the proportion of total assets in a project to review.
+
+        More information can be found here: 
+            https://docs.labelbox.com/en/quality-assurance/review-labels#configure-review-percentage
 
         Args:
             quota_factor (float): Which part (percentage) of the queue
                 to reinitiate. Between 0 and 1.
         """
+
+        if not 0. < quota_factor < 1.:
+            raise ValueError("Quota factor must be in the range of [0,1]")
+
         id_param = "projectId"
         quota_param = "quotaFactor"
         query_str = """mutation UpsertReviewQueuePyApi($%s: ID!, $%s: Float!){
@@ -373,7 +434,6 @@ def upsert_review_queue(self, quota_factor):
     def extend_reservations(self, queue_type):
         """ Extends all the current reservations for the current user on the given
         queue type.
-
         Args:
             queue_type (str): Either "LabelingQueue" or "ReviewQueue"
         Returns:
@@ -398,6 +458,11 @@ def create_prediction_model(self, name, version):
         Returns:
             A newly created PredictionModel.
         """
+
+        logger.warning(
+            "`create_prediction_model` is deprecated and is not compatible with the new editor."
+        )
+
         PM = Entity.PredictionModel
         model = self.client._create(PM, {
             PM.name.name: name,
@@ -423,6 +488,10 @@ def create_prediction(self, label, data_row, prediction_model=None):
                 is None and this Project's active_prediction_model is also
                 None.
         """
+        logger.warning(
+            "`create_prediction` is deprecated and is not compatible with the new editor."
+        )
+
         if prediction_model is None:
             prediction_model = self.active_prediction_model()
             if prediction_model is None:
@@ -495,6 +564,7 @@ def upload_annotations(
         Returns:
             BulkImportRequest
         """
+
         if isinstance(annotations, str) or isinstance(annotations, Path):
 
             def _is_url_valid(url: Union[str, Path]) -> bool: