Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# In progress
## Changed
* Default behavior for metrics to not include subclasses in the calculation.
* Updated `create_batch` method to accept consensus settings.

## Fixed
* Polygon extraction from masks creating invalid polygons. This would cause issues in the coco converter.
Expand Down
1 change: 1 addition & 0 deletions labelbox/schema/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class Batch(DbObject):
created_at = Field.DateTime("created_at")
updated_at = Field.DateTime("updated_at")
size = Field.Int("size")
consensus_settings = Field.Json("consensus_settings_json")

# Relationships
created_by = Relationship.ToOne("User")
Expand Down
18 changes: 18 additions & 0 deletions labelbox/schema/consensus_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from labelbox.utils import _CamelCaseMixin


class ConsensusSettings(_CamelCaseMixin):
"""Container for holding consensus quality settings

>>> ConsensusSettings(
>>> number_of_labels = 2,
>>> coverage_percentage = 0.2
>>> )

Args:
number_of_labels: Number of labels for consensus
coverage_percentage: Percentage of data rows to be labeled more than once
"""

number_of_labels: int
coverage_percentage: float
9 changes: 1 addition & 8 deletions labelbox/schema/data_row_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pydantic import BaseModel, conlist, constr

from labelbox.schema.ontology import SchemaId
from labelbox.utils import camel_case
from labelbox.utils import _CamelCaseMixin


class DataRowMetadataKind(Enum):
Expand Down Expand Up @@ -36,13 +36,6 @@ class DataRowMetadataSchema(BaseModel):
String: Type[str] = constr(max_length=500)


class _CamelCaseMixin(BaseModel):

class Config:
allow_population_by_field_name = True
alias_generator = camel_case


# Metadata base class
class DataRowMetadataField(_CamelCaseMixin):
schema_id: SchemaId
Expand Down
15 changes: 12 additions & 3 deletions labelbox/schema/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from labelbox.orm.db_object import DbObject, Updateable, Deletable
from labelbox.orm.model import Entity, Field, Relationship
from labelbox.pagination import PaginatedCollection
from labelbox.schema.consensus_settings import ConsensusSettings
from labelbox.schema.media_type import MediaType
from labelbox.schema.queue_mode import QueueMode
from labelbox.schema.resource_tag import ResourceTag
Expand Down Expand Up @@ -561,14 +562,18 @@ def setup(self, labeling_frontend, labeling_frontend_options) -> None:
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
self.update(setup_complete=timestamp)

def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
def create_batch(self,
name: str,
data_rows: List[str],
priority: int = 5,
consensus_settings: Optional[Dict[str, float]] = None):
"""Create a new batch for a project. Batches is in Beta and subject to change

Args:
name: a name for the batch, must be unique within a project
data_rows: Either a list of `DataRows` or Data Row ids
priority: An optional priority for the Data Rows in the Batch. 1 highest -> 5 lowest

consensus_settings: An optional dictionary with consensus settings: {'number_of_labels': 3, 'coverage_percentage': 0.1}
"""

# @TODO: make this automatic?
Expand Down Expand Up @@ -600,12 +605,16 @@ def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
}
""" % (method, method, query.results_query_part(Entity.Batch))

if consensus_settings:
consensus_settings = ConsensusSettings(**consensus_settings).dict(
by_alias=True)
params = {
"projectId": self.uid,
"batchInput": {
"name": name,
"dataRowIds": dr_ids,
"priority": priority
"priority": priority,
"consensusSettings": consensus_settings
}
}

Expand Down
8 changes: 8 additions & 0 deletions labelbox/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from pydantic import BaseModel


def _convert(s, sep, title):
Expand All @@ -23,3 +24,10 @@ def title_case(s):
def snake_case(s):
""" Converts a string in [snake|camel|title]case to snake_case. """
return _convert(s, "_", lambda i: False)


class _CamelCaseMixin(BaseModel):

class Config:
allow_population_by_field_name = True
alias_generator = camel_case
4 changes: 3 additions & 1 deletion tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ def batch_project(client, rand_gen):

@pytest.fixture
def consensus_project(client, rand_gen):
project = client.create_project(name=rand_gen(str), auto_audit_percentage=0)
project = client.create_project(name=rand_gen(str),
auto_audit_percentage=0,
queue_mode=QueueMode.Dataset)
yield project
project.delete()

Expand Down
14 changes: 13 additions & 1 deletion tests/integration/test_batch.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pytest

from labelbox import Dataset, Project
from labelbox.schema.queue_mode import QueueMode

IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg"

Expand Down Expand Up @@ -39,6 +38,19 @@ def test_create_batch(batch_project: Project, big_dataset: Dataset):
assert batch.size == len(data_rows)


def test_create_batch_with_consensus_settings(batch_project: Project,
big_dataset: Dataset):
data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
consensus_settings = {"coverage_percentage": 0.1, "number_of_labels": 3}
batch = batch_project.create_batch("batch with consensus settings",
data_rows,
3,
consensus_settings=consensus_settings)
assert batch.name == "batch with consensus settings"
assert batch.size == len(data_rows)
assert batch.consensus_settings == consensus_settings


def test_archive_batch(batch_project: Project, small_dataset: Dataset):
data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
batch = batch_project.create_batch("batch to archive", data_rows)
Expand Down