Skip to content
15 changes: 12 additions & 3 deletions labelbox/schema/asset_metadata.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from enum import Enum

from labelbox.orm.db_object import DbObject
from labelbox.orm.model import Field

Expand All @@ -9,9 +11,16 @@ class AssetMetadata(DbObject):
meta_type (str): IMAGE, VIDEO, TEXT, or IMAGE_OVERLAY
meta_value (str): URL to an external file or a string of text
"""
VIDEO = "VIDEO"
IMAGE = "IMAGE"
TEXT = "TEXT"

class MetaType(Enum):
VIDEO = "VIDEO"
IMAGE = "IMAGE"
TEXT = "TEXT"
IMAGE_OVERLAY = "IMAGE_OVERLAY"

# For backwards compatibility
for topic in MetaType:
vars()[topic.name] = topic.value

meta_type = Field.String("meta_type")
meta_value = Field.String("meta_value")
16 changes: 14 additions & 2 deletions labelbox/schema/data_row.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from labelbox.orm import query
from labelbox.orm.db_object import DbObject, Updateable, BulkDeletable
from labelbox.orm.model import Entity, Field, Relationship
from labelbox.pagination import PaginatedCollection
from labelbox.schema.asset_metadata import AssetMetadata


class DataRow(DbObject, Updateable, BulkDeletable):
Expand Down Expand Up @@ -34,6 +34,10 @@ class DataRow(DbObject, Updateable, BulkDeletable):
metadata = Relationship.ToMany("AssetMetadata", False, "metadata")
predictions = Relationship.ToMany("Prediction", False)

supported_meta_types = {
meta_type.value for meta_type in AssetMetadata.MetaType
}

@staticmethod
def bulk_delete(data_rows):
""" Deletes all the given DataRows.
Expand All @@ -55,11 +59,19 @@ def create_metadata(self, meta_type, meta_value):

Args:
meta_type (str): Asset metadata type, must be one of:
VIDEO, IMAGE, TEXT.
VIDEO, IMAGE, TEXT, IMAGE_OVERLAY (AssetMetadata.MetaType)
meta_value (str): Asset metadata value.
Returns:
`AssetMetadata` DB object.
Raises:
ValueError: meta_type must be one of the supported types.
"""

if meta_type not in self.supported_meta_types:
raise ValueError(
f"meta_type must be one of {self.supported_meta_types}. Found {meta_type}"
)

meta_type_param = "metaType"
meta_value_param = "metaValue"
data_row_id_param = "dataRowId"
Expand Down
37 changes: 32 additions & 5 deletions labelbox/schema/dataset.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import os
import json
import logging
from itertools import islice
from multiprocessing.dummy import Pool as ThreadPool
import os

from labelbox.exceptions import InvalidQueryError, ResourceNotFoundError, InvalidAttributeError
from labelbox.orm.db_object import DbObject, Updateable, Deletable
from labelbox.orm.model import Entity, Field, Relationship

logger = logging.getLogger(__name__)


class Dataset(DbObject, Updateable, Deletable):
""" A Dataset is a collection of DataRows.
Expand Down Expand Up @@ -163,12 +167,13 @@ def convert_item(item):
task._user = user
return task

def data_row_for_external_id(self, external_id):
def data_rows_for_external_id(self, external_id, limit=10):
""" Convenience method for getting a single `DataRow` belonging to this
`Dataset` that has the given `external_id`.

Args:
external_id (str): External ID of the sought `DataRow`.
limit (int): The maximum number of data rows to return for the given external_id

Returns:
A single `DataRow` with the given ID.
Expand All @@ -182,10 +187,32 @@ def data_row_for_external_id(self, external_id):
where = DataRow.external_id == external_id

data_rows = self.data_rows(where=where)
# Get at most two data_rows.
data_rows = [row for row, _ in zip(data_rows, range(2))]
# Get at most `limit` data_rows.
data_rows = list(islice(data_rows, limit))

if len(data_rows) != 1:
if not len(data_rows):
raise ResourceNotFoundError(DataRow, where)
return data_rows

def data_row_for_external_id(self, external_id):
""" Convenience method for getting a single `DataRow` belonging to this
`Dataset` that has the given `external_id`.

Args:
external_id (str): External ID of the sought `DataRow`.

Returns:
A single `DataRow` with the given ID.

Raises:
labelbox.exceptions.ResourceNotFoundError: If there is no `DataRow`
in this `DataSet` with the given external ID, or if there are
multiple `DataRows` for it.
"""
data_rows = self.data_rows_for_external_id(external_id=external_id,
limit=2)
if len(data_rows) > 1:
logger.warning(
f"More than one data_row has the provided external_id : `%s`. Use function data_rows_for_external_id to fetch all",
external_id)
return data_rows[0]
86 changes: 78 additions & 8 deletions labelbox/schema/project.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from collections import namedtuple
from datetime import datetime, timezone
import json
import time
import logging
from collections import namedtuple
from datetime import datetime, timezone
from pathlib import Path
import time
from typing import Dict, List, Union, Iterable
from urllib.parse import urlparse

from labelbox import utils
from labelbox.schema.bulk_import_request import BulkImportRequest
from labelbox.schema.data_row import DataRow
from labelbox.exceptions import InvalidQueryError
from labelbox.orm import query
from labelbox.orm.db_object import DbObject, Updateable, Deletable
Expand Down Expand Up @@ -88,6 +89,9 @@ def create_label(self, **kwargs):
# deprecated and we don't want the Py client lib user to know
# about them. At the same time they're connected to a Label at
# label creation in a non-standard way (connect via name).
logger.warning(
"`create_label` is deprecated and is not compatible with the new editor."
)

Label = Entity.Label

Expand Down Expand Up @@ -196,7 +200,7 @@ def upsert_instructions(self, instructions_file: str):
frontendId = frontend.uid

if frontend.name != "Editor":
logger.warn(
logger.warning(
f"This function has only been tested to work with the Editor front end. Found %s",
frontend.name)

Expand Down Expand Up @@ -312,18 +316,66 @@ def setup(self, labeling_frontend, labeling_frontend_options):
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
self.update(setup_complete=timestamp)

def validate_labeling_parameter_overrides(self, data):
for idx, row in enumerate(data):
if len(row) != 3:
raise TypeError(
f"Data must be a list of tuples containing a DataRow, priority (int), num_labels (int). Found {len(row)} items. Index: {idx}"
)
data_row, priority, num_labels = row
if not isinstance(data_row, DataRow):
raise TypeError(
f"data_row should be be of type DataRow. Found {type(data_row)}. Index: {idx}"
)

for name, value in [["Priority", priority],
["Number of labels", num_labels]]:
if not isinstance(value, int):
raise TypeError(
f"{name} must be an int. Found {type(value)} for data_row {data_row}. Index: {idx}"
)
if value < 1:
raise ValueError(
f"{name} must be greater than 0 for data_row {data_row}. Index: {idx}"
)

def set_labeling_parameter_overrides(self, data):
""" Adds labeling parameter overrides to this project.


See information on priority here:
https://docs.labelbox.com/en/configure-editor/queue-system#reservation-system

>>> project.set_labeling_parameter_overrides([
>>> (data_row_1, 2, 3), (data_row_2, 1, 4)])

Args:
data (iterable): An iterable of tuples. Each tuple must contain
(DataRow, priority, numberOfLabels) for the new override.
(DataRow, priority<int>, number_of_labels<int>) for the new override.

Priority:
* Data will be labeled in priority order.
- A lower number priority is labeled first.
- Minimum priority is 1.
* Priority is not the queue position.
- The position is determined by the relative priority.
- E.g. [(data_row_1, 5,1), (data_row_2, 2,1), (data_row_3, 10,1)]
will be assigned in the following order: [data_row_2, data_row_1, data_row_3]
* Datarows with parameter overrides will appear before datarows without overrides.
* The priority only effects items in the queue.
- Assigning a priority will not automatically add the item back into the queue.
Number of labels:
* The number of times a data row should be labeled.
- Creates duplicate data rows in a project (one for each number of labels).
* New duplicated data rows will be added to the queue.
- Already labeled duplicates will not be sent back to the queue.
* The queue will never assign the same datarow to a single labeler more than once.
- If the number of labels is greater than the number of labelers working on a project then
the extra items will remain in the queue (this can be fixed by removing the override at any time).
* Setting this to 1 will result in the default behavior (no duplicates).
Returns:
bool, indicates if the operation was a success.
"""
self.validate_labeling_parameter_overrides(data)
data_str = ",\n".join(
"{dataRow: {id: \"%s\"}, priority: %d, numLabels: %d }" %
(data_row.uid, priority, num_labels)
Expand All @@ -338,6 +390,8 @@ def set_labeling_parameter_overrides(self, data):
def unset_labeling_parameter_overrides(self, data_rows):
""" Removes labeling parameter overrides to this project.

* This will remove unlabeled duplicates in the queue.

Args:
data_rows (iterable): An iterable of DataRows.
Returns:
Expand All @@ -353,12 +407,19 @@ def unset_labeling_parameter_overrides(self, data_rows):
return res["project"]["unsetLabelingParameterOverrides"]["success"]

def upsert_review_queue(self, quota_factor):
""" Reinitiates the review queue for this project.
""" Sets the the proportion of total assets in a project to review.

More information can be found here:
https://docs.labelbox.com/en/quality-assurance/review-labels#configure-review-percentage

Args:
quota_factor (float): Which part (percentage) of the queue
to reinitiate. Between 0 and 1.
"""

if not 0. < quota_factor < 1.:
raise ValueError("Quota factor must be in the range of [0,1]")

id_param = "projectId"
quota_param = "quotaFactor"
query_str = """mutation UpsertReviewQueuePyApi($%s: ID!, $%s: Float!){
Expand All @@ -373,7 +434,6 @@ def upsert_review_queue(self, quota_factor):
def extend_reservations(self, queue_type):
""" Extends all the current reservations for the current user on the given
queue type.

Args:
queue_type (str): Either "LabelingQueue" or "ReviewQueue"
Returns:
Expand All @@ -398,6 +458,11 @@ def create_prediction_model(self, name, version):
Returns:
A newly created PredictionModel.
"""

logger.warning(
"`create_prediction_model` is deprecated and is not compatible with the new editor."
)

PM = Entity.PredictionModel
model = self.client._create(PM, {
PM.name.name: name,
Expand All @@ -423,6 +488,10 @@ def create_prediction(self, label, data_row, prediction_model=None):
is None and this Project's active_prediction_model is also
None.
"""
logger.warning(
"`create_prediction` is deprecated and is not compatible with the new editor."
)

if prediction_model is None:
prediction_model = self.active_prediction_model()
if prediction_model is None:
Expand Down Expand Up @@ -495,6 +564,7 @@ def upload_annotations(
Returns:
BulkImportRequest
"""

if isinstance(annotations, str) or isinstance(annotations, Path):

def _is_url_valid(url: Union[str, Path]) -> bool:
Expand Down
Loading