Skip to content

Release 3.30.1 #768

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Nov 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

# Version 3.30.1 (2022-11-16)
### Fixed
* Running `project.setup_editor()` multiple times no longer resets the ontology, and instead raises an error if the editor is already set up for the project

# Version 3.30.0 (2022-11-11)
### Changed
* create_data_rows, create_data_rows_sync, create_data_row, and update data rows all accept the new data row input format for row data
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
copyright = '2021, Labelbox'
author = 'Labelbox'

release = '3.30.0'
release = '3.30.1'

# -- General configuration ---------------------------------------------------

Expand Down
4 changes: 2 additions & 2 deletions labelbox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name = "labelbox"
__version__ = "3.30.0"
__version__ = "3.30.1"

from labelbox.client import Client
from labelbox.schema.project import Project
Expand Down Expand Up @@ -27,4 +27,4 @@
from labelbox.schema.resource_tag import ResourceTag
from labelbox.schema.project_resource_tag import ProjectResourceTag
from labelbox.schema.media_type import MediaType
from labelbox.schema.slice import Slice, CatalogSlice
from labelbox.schema.slice import Slice, CatalogSlice
5 changes: 5 additions & 0 deletions labelbox/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,8 @@ class MALValidationError(LabelboxError):
class OperationNotAllowedException(Exception):
"""Raised when user does not have permissions to a resource or has exceeded usage limit"""
pass


class ProcessingWaitTimeout(Exception):
"""Raised when waiting for the data rows to be processed takes longer than allowed"""
pass
12 changes: 11 additions & 1 deletion labelbox/schema/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,15 @@ class Batch(DbObject):
# Relationships
created_by = Relationship.ToOne("User")

def __init__(self, client, project_id, *args, **kwargs):
def __init__(self,
client,
project_id,
*args,
failed_data_row_ids=None,
**kwargs):
super().__init__(client, *args, **kwargs)
self.project_id = project_id
self._failed_data_row_ids = failed_data_row_ids

def project(self) -> 'Project': # type: ignore
""" Returns Project which this Batch belongs to
Expand Down Expand Up @@ -174,3 +180,7 @@ def delete_labels(self, set_labels_as_template=False) -> None:
},
experimental=True)
return res

@property
def failed_data_row_ids(self):
return (x for x in self._failed_data_row_ids)
72 changes: 63 additions & 9 deletions labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,17 @@
from collections import namedtuple
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING, Dict, Union, Iterable, List, Optional, Any
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
from urllib.parse import urlparse

import ndjson
import requests

from labelbox import utils
from labelbox.exceptions import InvalidQueryError, LabelboxError
from labelbox.exceptions import (InvalidQueryError, LabelboxError,
ProcessingWaitTimeout, ResourceConflict)
from labelbox.orm import query
from labelbox.orm.db_object import DbObject, Updateable, Deletable
from labelbox.orm.db_object import DbObject, Deletable, Updateable
from labelbox.orm.model import Entity, Field, Relationship
from labelbox.pagination import PaginatedCollection
from labelbox.schema.consensus_settings import ConsensusSettings
Expand Down Expand Up @@ -90,6 +91,9 @@ class Project(DbObject, Updateable, Deletable):
benchmarks = Relationship.ToMany("Benchmark", False)
ontology = Relationship.ToOne("Ontology", True)

#
_wait_processing_max_seconds = 3600

def update(self, **kwargs):
""" Updates this project with the specified attributes

Expand Down Expand Up @@ -319,7 +323,7 @@ def _validate_datetime(string_date: str) -> bool:
return True
except ValueError:
pass
raise ValueError(f"""Incorrect format for: {string_date}.
raise ValueError(f"""Incorrect format for: {string_date}.
Format must be \"YYYY-MM-DD\" or \"YYYY-MM-DD hh:mm:ss\"""")
return True

Expand Down Expand Up @@ -507,6 +511,9 @@ def setup_editor(self, ontology) -> None:
Args:
ontology (Ontology): The ontology to attach to the project
"""
if self.labeling_frontend() is not None:
raise ResourceConflict("Editor is already set up.")

labeling_frontend = next(
self.client.get_labeling_frontends(
where=Entity.LabelingFrontend.name == "Editor"))
Expand Down Expand Up @@ -546,6 +553,9 @@ def setup(self, labeling_frontend, labeling_frontend_options) -> None:
to `str` using `json.dumps`.
"""

if self.labeling_frontend() is not None:
raise ResourceConflict("Editor is already set up.")

if not isinstance(labeling_frontend_options, str):
labeling_frontend_options = json.dumps(labeling_frontend_options)

Expand Down Expand Up @@ -595,11 +605,16 @@ def create_batch(self,
if not len(dr_ids):
raise ValueError("You need at least one data row in a batch")

method = 'createBatch'
self._wait_until_data_rows_are_processed(
data_rows, self._wait_processing_max_seconds)
method = 'createBatchV2'
query_str = """mutation %sPyApi($projectId: ID!, $batchInput: CreateBatchInput!) {
project(where: {id: $projectId}) {
%s(input: $batchInput) {
%s
batch {
%s
}
failedDataRowIds
}
}
}
Expand All @@ -622,9 +637,12 @@ def create_batch(self,
params,
timeout=180.0,
experimental=True)["project"][method]

res['size'] = len(dr_ids)
return Entity.Batch(self.client, self.uid, res)
batch = res['batch']
batch['size'] = len(dr_ids)
return Entity.Batch(self.client,
self.uid,
batch,
failed_data_row_ids=res['failedDataRowIds'])

def _update_queue_mode(self, mode: "QueueMode") -> "QueueMode":
"""
Expand Down Expand Up @@ -977,6 +995,42 @@ def _is_url_valid(url: Union[str, Path]) -> bool:
raise ValueError(
f'Invalid annotations given of type: {type(annotations)}')

def _wait_until_data_rows_are_processed(self,
data_row_ids: List[str],
wait_processing_max_seconds: int,
sleep_interval=30):
""" Wait until all the specified data rows are processed"""
start_time = datetime.now()
while True:
if (datetime.now() -
start_time).total_seconds() >= wait_processing_max_seconds:
raise ProcessingWaitTimeout(
"Maximum wait time exceeded while waiting for data rows to be processed. Try creating a batch a bit later"
)

all_good = self.__check_data_rows_have_been_processed(data_row_ids)
if all_good:
return

logger.debug(
'Some of the data rows are still being processed, waiting...')
time.sleep(sleep_interval)

def __check_data_rows_have_been_processed(self, data_row_ids: List[str]):
data_row_ids_param = "data_row_ids"

query_str = """query CheckAllDataRowsHaveBeenProcessedPyApi($%s: [ID!]!) {
queryAllDataRowsHaveBeenProcessed(dataRowIds:$%s) {
allDataRowsHaveBeenProcessed
}
}""" % (data_row_ids_param, data_row_ids_param)

params = {}
params[data_row_ids_param] = data_row_ids
response = self.client.execute(query_str, params)
return response["queryAllDataRowsHaveBeenProcessed"][
"allDataRowsHaveBeenProcessed"]


class ProjectMember(DbObject):
user = Relationship.ToOne("User", cache=True)
Expand Down
7 changes: 7 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,13 @@ def dataset(client, rand_gen):
dataset.delete()


@pytest.fixture(scope='function')
def unique_dataset(client, rand_gen):
dataset = client.create_dataset(name=rand_gen(str))
yield dataset
dataset.delete()


@pytest.fixture
def datarow(dataset, image_url):
task = dataset.create_data_rows([
Expand Down
70 changes: 69 additions & 1 deletion tests/integration/test_batch.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from labelbox.exceptions import ProcessingWaitTimeout
import pytest

from labelbox import Dataset, Project

IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg"
Expand Down Expand Up @@ -31,6 +31,23 @@ def small_dataset(dataset: Dataset):
yield dataset


@pytest.fixture(scope='function')
def dataset_with_invalid_data_rows(unique_dataset: Dataset):
upload_invalid_data_rows_for_dataset(unique_dataset)

yield unique_dataset


def upload_invalid_data_rows_for_dataset(dataset: Dataset):
task = dataset.create_data_rows([
{
"row_data": 'gs://invalid-bucket/example.png', # forbidden
"external_id": "image-without-access.jpg"
},
] * 2)
task.wait_till_done()


def test_create_batch(batch_project: Project, big_dataset: Dataset):
data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
batch = batch_project.create_batch("test-batch", data_rows, 3)
Expand Down Expand Up @@ -72,12 +89,63 @@ def test_batch_project(batch_project: Project, small_dataset: Dataset):
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
batch = batch_project.create_batch("batch to test project relationship",
data_rows)

project_from_batch = batch.project()

assert project_from_batch.uid == batch_project.uid
assert project_from_batch.name == batch_project.name


def test_batch_creation_for_data_rows_with_issues(
batch_project: Project, small_dataset: Dataset,
dataset_with_invalid_data_rows: Dataset):
"""
Create a batch containing both valid and invalid data rows
"""
valid_data_rows = [dr.uid for dr in list(small_dataset.data_rows())]
invalid_data_rows = [
dr.uid for dr in list(dataset_with_invalid_data_rows.data_rows())
]
data_rows_to_add = valid_data_rows + invalid_data_rows

assert len(data_rows_to_add) == 5
batch = batch_project.create_batch("batch to test failed data rows",
data_rows_to_add)
failed_data_row_ids = [x for x in batch.failed_data_row_ids]
assert len(failed_data_row_ids) == 2

failed_data_row_ids_set = set(failed_data_row_ids)
invalid_data_rows_set = set(invalid_data_rows)
assert len(failed_data_row_ids_set.intersection(invalid_data_rows_set)) == 2


def test_batch_creation_with_processing_timeout(batch_project: Project,
small_dataset: Dataset,
unique_dataset: Dataset):
"""
Create a batch with zero wait time, this means that the waiting logic will throw exception immediately
"""
# wait for these data rows to be processed
valid_data_rows = [dr.uid for dr in list(small_dataset.data_rows())]
batch_project._wait_until_data_rows_are_processed(
valid_data_rows, wait_processing_max_seconds=3600, sleep_interval=5)

# upload data rows for this dataset and don't wait
upload_invalid_data_rows_for_dataset(unique_dataset)
unprocessed_data_rows = [dr.uid for dr in list(unique_dataset.data_rows())]

data_row_ids = valid_data_rows + unprocessed_data_rows

stashed_wait_timeout = batch_project._wait_processing_max_seconds
with pytest.raises(ProcessingWaitTimeout):
# emulate the situation where there are still some data rows being
# processed but wait timeout exceeded
batch_project._wait_processing_max_seconds = 0
batch_project.create_batch("batch to test failed data rows",
data_row_ids)
batch_project._wait_processing_max_seconds = stashed_wait_timeout


def test_export_data_rows(batch_project: Project, dataset: Dataset):
n_data_rows = 5
task = dataset.create_data_rows([
Expand Down
11 changes: 10 additions & 1 deletion tests/integration/test_project_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

from labelbox import LabelingFrontend
from labelbox.exceptions import InvalidQueryError
from labelbox.exceptions import InvalidQueryError, ResourceConflict


def simple_ontology():
Expand Down Expand Up @@ -67,3 +67,12 @@ def test_project_editor_setup(client, project, rand_gen):
time.sleep(3) # Search takes a second
assert [ontology.name for ontology in client.get_ontologies(ontology_name)
] == [ontology_name]


def test_project_editor_setup_cant_call_multiple_times(client, project,
rand_gen):
ontology_name = f"test_project_editor_setup_ontology_name-{rand_gen(str)}"
ontology = client.create_ontology(ontology_name, simple_ontology())
project.setup_editor(ontology)
with pytest.raises(ResourceConflict):
project.setup_editor(ontology)