Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Changelog

# Version 3.20.0 (2022-04-27)
## Added
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is going to be a new version of the SDK, can we also include information on this merged PR?

#540

Updated

  • NDJsonConverter now supports Video bounding box annotations.
    • Note: Currently does not support nested classifications.
    • Note: Converting an export into Labelbox annotation types, and back to export will result in only keyframe annotations. This is to support correct import format.

cc @msokoloff1 if this wording looks okay

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup looks good. Thanks

* Batches in a project can be retrieved with `project.batches()`
* Added `Batch.remove_queued_data_rows()` to cancel remaining data rows in batch
* Added `Batch.export_data_rows()` which returns `DataRow`s for a batch

## Updated
* NDJsonConverter now supports Video bounding box annotations.
* Note: Currently does not support nested classifications.
* Note: Converting an export into Labelbox annotation types, and back to export will result in only keyframe annotations. This is to support correct import format.


## Fix
* `batch.project()` now works

# Version 3.19.1 (2022-04-14)
## Fix
* `create_data_rows` and `create_data_rows_sync` now uploads the file with a mimetype
Expand Down
2 changes: 1 addition & 1 deletion labelbox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = "labelbox"
__version__ = "3.19.1"
__version__ = "3.20.0"

import sys
import warnings
Expand Down
1 change: 1 addition & 0 deletions labelbox/orm/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ class Entity(metaclass=EntityMeta):
Invite: Type[labelbox.Invite]
InviteLimit: Type[labelbox.InviteLimit]
ProjectRole: Type[labelbox.ProjectRole]
Project: Type[labelbox.Project]
Batch: Type[labelbox.Batch]

@classmethod
Expand Down
4 changes: 2 additions & 2 deletions labelbox/pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self,
params: Dict[str, str],
dereferencing: Union[List[str], Dict[str, Any]],
obj_class: Union[Type["DbObject"], Callable[[Any, Any], Any]],
cursor_path: Optional[Dict[str, Any]] = None,
cursor_path: Optional[List[str]] = None,
experimental: bool = False):
""" Creates a PaginatedCollection.

Expand Down Expand Up @@ -105,7 +105,7 @@ def get_next_page(self) -> Tuple[Dict[str, Any], bool]:

class _CursorPagination(_Pagination):

def __init__(self, cursor_path: Dict[str, Any], *args, **kwargs):
def __init__(self, cursor_path: List[str], *args, **kwargs):
super().__init__(*args, **kwargs)
self.cursor_path = cursor_path
self.next_cursor: Optional[Any] = None
Expand Down
101 changes: 98 additions & 3 deletions labelbox/schema/batch.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
from labelbox.orm.db_object import DbObject
from labelbox.orm.model import Field, Relationship
from typing import Generator, TYPE_CHECKING
from labelbox.orm.db_object import DbObject, experimental
from labelbox.orm import query
from labelbox.orm.model import Entity, Field, Relationship
from labelbox.exceptions import LabelboxError, ResourceNotFoundError
from io import StringIO
import ndjson
import requests
import logging
import time

if TYPE_CHECKING:
from labelbox import Project

logger = logging.getLogger(__name__)


class Batch(DbObject):
Expand All @@ -21,5 +34,87 @@ class Batch(DbObject):
size = Field.Int("size")

# Relationships
project = Relationship.ToOne("Project")
created_by = Relationship.ToOne("User")

def __init__(self, client, project_id, *args, **kwargs):
super().__init__(client, *args, **kwargs)
self.project_id = project_id

def project(self) -> 'Project': # type: ignore
""" Returns Project which this Batch belongs to

Raises:
LabelboxError: if the project is not found
"""
query_str = """query getProjectPyApi($projectId: ID!) {
project(
where: {id: $projectId}){
%s
}}""" % query.results_query_part(Entity.Project)
params = {"projectId": self.project_id}
response = self.client.execute(query_str, params)

if response is None:
raise ResourceNotFoundError(Entity.Project, params)

return Entity.Project(self.client, response["project"])

def remove_queued_data_rows(self) -> None:
""" Removes remaining queued data rows from the batch and labeling queue.

Args:
batch (Batch): Batch to remove queued data rows from
"""

project_id_param = "projectId"
batch_id_param = "batchId"
self.client.execute("""mutation ArchiveBatchPyApi($%s: ID!, $%s: ID!) {
project(where: {id: $%s}) { archiveBatch(batchId: $%s) { id archivedAt } }
}""" % (project_id_param, batch_id_param, project_id_param,
batch_id_param), {
project_id_param: self.project_id,
batch_id_param: self.uid
},
experimental=True)

def export_data_rows(self, timeout_seconds=120) -> Generator:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this the same output structure as dataset?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes

""" Returns a generator that produces all data rows that are currently
in this batch.

Note: For efficiency, the data are cached for 30 minutes. Newly created data rows will not appear
until the end of the cache period.

Args:
timeout_seconds (float): Max waiting time, in seconds.
Returns:
Generator that yields DataRow objects belonging to this batch.
Raises:
LabelboxError: if the export fails or is unable to download within the specified time.
"""
id_param = "batchId"
query_str = """mutation GetBatchDataRowsExportUrlPyApi($%s: ID!)
{exportBatchDataRows(data:{batchId: $%s }) {downloadUrl createdAt status}}
""" % (id_param, id_param)
sleep_time = 2
while True:
res = self.client.execute(query_str, {id_param: self.uid})
res = res["exportBatchDataRows"]
if res["status"] == "COMPLETE":
download_url = res["downloadUrl"]
response = requests.get(download_url)
response.raise_for_status()
reader = ndjson.reader(StringIO(response.text))
return (
Entity.DataRow(self.client, result) for result in reader)
elif res["status"] == "FAILED":
raise LabelboxError("Data row export failed.")

timeout_seconds -= sleep_time
if timeout_seconds <= 0:
raise LabelboxError(
f"Unable to export data rows within {timeout_seconds} seconds."
)

logger.debug("Batch '%s' data row export, waiting for server...",
self.uid)
time.sleep(sleep_time)
22 changes: 20 additions & 2 deletions labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def export_labels(self,

def _string_from_dict(dictionary: dict, value_with_quotes=False) -> str:
"""Returns a concatenated string of the dictionary's keys and values

The string will be formatted as {key}: 'value' for each key. Value will be inclusive of
quotations while key will not. This can be toggled with `value_with_quotes`"""

Expand Down Expand Up @@ -609,7 +609,7 @@ def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
experimental=True)["project"][method]

res['size'] = len(dr_ids)
return Entity.Batch(self.client, res)
return Entity.Batch(self.client, self.uid, res)

def _update_queue_mode(self,
mode: "Project.QueueMode") -> "Project.QueueMode":
Expand Down Expand Up @@ -840,6 +840,24 @@ def bulk_import_requests(self) -> PaginatedCollection:
["bulkImportRequests"],
Entity.BulkImportRequest)

def batches(self) -> PaginatedCollection:
""" Fetch all batches that belong to this project

Returns:
A `PaginatedCollection of `Batch`es
"""
id_param = "projectId"
query_str = """query GetProjectBatchesPyApi($from: String, $first: PageSize, $%s: ID!) {
project(where: {id: $%s}) {id
batches(after: $from, first: $first) { nodes { %s } pageInfo { endCursor }}}}
""" % (id_param, id_param, query.results_query_part(Entity.Batch))
return PaginatedCollection(
self.client,
query_str, {id_param: self.uid}, ['project', 'batches', 'nodes'],
lambda client, res: Entity.Batch(client, self.uid, res),
cursor_path=['project', 'batches', 'pageInfo', 'endCursor'],
experimental=True)

def upload_annotations(
self,
name: str,
Expand Down
55 changes: 55 additions & 0 deletions tests/integration/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,65 @@ def big_dataset(dataset: Dataset):
yield dataset


@pytest.fixture
def small_dataset(dataset: Dataset):
task = dataset.create_data_rows([
{
"row_data": IMAGE_URL,
"external_id": "my-image"
},
] * 3)
task.wait_till_done()

yield dataset


def test_create_batch(configured_project: Project, big_dataset: Dataset):
configured_project.update(queue_mode=Project.QueueMode.Batch)

data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
batch = configured_project.create_batch("test-batch", data_rows, 3)
assert batch.name == 'test-batch'
assert batch.size == len(data_rows)


def test_archive_batch(configured_project: Project, small_dataset: Dataset):
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
configured_project.update(queue_mode=Project.QueueMode.Batch)
batch = configured_project.create_batch("batch to archive", data_rows)
batch.remove_queued_data_rows()
exported_data_rows = list(batch.export_data_rows())

assert len(exported_data_rows) == 0


def test_batch_project(configured_project: Project, small_dataset: Dataset):
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
configured_project.update(queue_mode=Project.QueueMode.Batch)
batch = configured_project.create_batch(
"batch to test project relationship", data_rows)
project_from_batch = batch.project()

assert project_from_batch.uid == configured_project.uid
assert project_from_batch.name == configured_project.name


def test_export_data_rows(configured_project: Project, dataset: Dataset):
n_data_rows = 5
task = dataset.create_data_rows([
{
"row_data": IMAGE_URL,
"external_id": "my-image"
},
] * n_data_rows)
task.wait_till_done()

data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
configured_project.update(queue_mode=Project.QueueMode.Batch)
batch = configured_project.create_batch("batch test", data_rows)

result = list(batch.export_data_rows())
exported_data_rows = [dr.uid for dr in result]

assert len(result) == n_data_rows
assert set(data_rows) == set(exported_data_rows)
21 changes: 20 additions & 1 deletion tests/integration/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
import requests

from labelbox import Project, LabelingFrontend
from labelbox import Project, LabelingFrontend, Dataset
from labelbox.exceptions import InvalidQueryError


Expand Down Expand Up @@ -201,3 +201,22 @@ def test_queue_mode(configured_project: Project):
) == configured_project.QueueMode.Dataset
configured_project.update(queue_mode=configured_project.QueueMode.Batch)
assert configured_project.queue_mode() == configured_project.QueueMode.Batch


def test_batches(configured_project: Project, dataset: Dataset, image_url):
task = dataset.create_data_rows([
{
"row_data": image_url,
"external_id": "my-image"
},
] * 2)
task.wait_till_done()
configured_project.update(queue_mode=configured_project.QueueMode.Batch)
data_rows = [dr.uid for dr in list(dataset.export_data_rows())]
batch_one = 'batch one'
batch_two = 'batch two'
configured_project.create_batch(batch_one, [data_rows[0]])
configured_project.create_batch(batch_two, [data_rows[1]])

names = set([batch.name for batch in list(configured_project.batches())])
assert names == set([batch_one, batch_two])