diff --git a/CHANGELOG.md b/CHANGELOG.md index 5133acb71..ad670467f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ # In progress ## Changed * Default behavior for metrics to not include subclasses in the calculation. +* Updated `create_batch` method to accept consensus settings. ## Fixed * Polygon extraction from masks creating invalid polygons. This would cause issues in the coco converter. diff --git a/labelbox/schema/batch.py b/labelbox/schema/batch.py index ed93be19a..f64fcab0a 100644 --- a/labelbox/schema/batch.py +++ b/labelbox/schema/batch.py @@ -32,6 +32,7 @@ class Batch(DbObject): created_at = Field.DateTime("created_at") updated_at = Field.DateTime("updated_at") size = Field.Int("size") + consensus_settings = Field.Json("consensus_settings_json") # Relationships created_by = Relationship.ToOne("User") diff --git a/labelbox/schema/consensus_settings.py b/labelbox/schema/consensus_settings.py new file mode 100644 index 000000000..bd9cfc047 --- /dev/null +++ b/labelbox/schema/consensus_settings.py @@ -0,0 +1,18 @@ +from labelbox.utils import _CamelCaseMixin + + +class ConsensusSettings(_CamelCaseMixin): + """Container for holding consensus quality settings + + >>> ConsensusSettings( + >>> number_of_labels = 2, + >>> coverage_percentage = 0.2 + >>> ) + + Args: + number_of_labels: Number of labels for consensus + coverage_percentage: Percentage of data rows to be labeled more than once + """ + + number_of_labels: int + coverage_percentage: float diff --git a/labelbox/schema/data_row_metadata.py b/labelbox/schema/data_row_metadata.py index e63630be1..461076151 100644 --- a/labelbox/schema/data_row_metadata.py +++ b/labelbox/schema/data_row_metadata.py @@ -8,7 +8,7 @@ from pydantic import BaseModel, conlist, constr from labelbox.schema.ontology import SchemaId -from labelbox.utils import camel_case +from labelbox.utils import _CamelCaseMixin class DataRowMetadataKind(Enum): @@ -36,13 +36,6 @@ class DataRowMetadataSchema(BaseModel): String: Type[str] = constr(max_length=500) -class _CamelCaseMixin(BaseModel): - - class Config: - allow_population_by_field_name = True - alias_generator = camel_case - - # Metadata base class class DataRowMetadataField(_CamelCaseMixin): schema_id: SchemaId diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index c359aea5f..cbfba9500 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -16,6 +16,7 @@ from labelbox.orm.db_object import DbObject, Updateable, Deletable from labelbox.orm.model import Entity, Field, Relationship from labelbox.pagination import PaginatedCollection +from labelbox.schema.consensus_settings import ConsensusSettings from labelbox.schema.media_type import MediaType from labelbox.schema.queue_mode import QueueMode from labelbox.schema.resource_tag import ResourceTag @@ -561,14 +562,18 @@ def setup(self, labeling_frontend, labeling_frontend_options) -> None: timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) - def create_batch(self, name: str, data_rows: List[str], priority: int = 5): + def create_batch(self, + name: str, + data_rows: List[str], + priority: int = 5, + consensus_settings: Optional[Dict[str, float]] = None): """Create a new batch for a project. Batches is in Beta and subject to change Args: name: a name for the batch, must be unique within a project data_rows: Either a list of `DataRows` or Data Row ids priority: An optional priority for the Data Rows in the Batch. 1 highest -> 5 lowest - + consensus_settings: An optional dictionary with consensus settings: {'number_of_labels': 3, 'coverage_percentage': 0.1} """ # @TODO: make this automatic? @@ -600,12 +605,16 @@ def create_batch(self, name: str, data_rows: List[str], priority: int = 5): } """ % (method, method, query.results_query_part(Entity.Batch)) + if consensus_settings: + consensus_settings = ConsensusSettings(**consensus_settings).dict( + by_alias=True) params = { "projectId": self.uid, "batchInput": { "name": name, "dataRowIds": dr_ids, - "priority": priority + "priority": priority, + "consensusSettings": consensus_settings } } diff --git a/labelbox/utils.py b/labelbox/utils.py index b4c70b23e..624437182 100644 --- a/labelbox/utils.py +++ b/labelbox/utils.py @@ -1,4 +1,5 @@ import re +from pydantic import BaseModel def _convert(s, sep, title): @@ -23,3 +24,10 @@ def title_case(s): def snake_case(s): """ Converts a string in [snake|camel|title]case to snake_case. """ return _convert(s, "_", lambda i: False) + + +class _CamelCaseMixin(BaseModel): + + class Config: + allow_population_by_field_name = True + alias_generator = camel_case diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index fcbd173c4..7c6e50221 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -177,7 +177,9 @@ def batch_project(client, rand_gen): @pytest.fixture def consensus_project(client, rand_gen): - project = client.create_project(name=rand_gen(str), auto_audit_percentage=0) + project = client.create_project(name=rand_gen(str), + auto_audit_percentage=0, + queue_mode=QueueMode.Dataset) yield project project.delete() diff --git a/tests/integration/test_batch.py b/tests/integration/test_batch.py index 7f469df9d..4084acfb1 100644 --- a/tests/integration/test_batch.py +++ b/tests/integration/test_batch.py @@ -1,7 +1,6 @@ import pytest from labelbox import Dataset, Project -from labelbox.schema.queue_mode import QueueMode IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg" @@ -39,6 +38,19 @@ def test_create_batch(batch_project: Project, big_dataset: Dataset): assert batch.size == len(data_rows) +def test_create_batch_with_consensus_settings(batch_project: Project, + big_dataset: Dataset): + data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())] + consensus_settings = {"coverage_percentage": 0.1, "number_of_labels": 3} + batch = batch_project.create_batch("batch with consensus settings", + data_rows, + 3, + consensus_settings=consensus_settings) + assert batch.name == "batch with consensus settings" + assert batch.size == len(data_rows) + assert batch.consensus_settings == consensus_settings + + def test_archive_batch(batch_project: Project, small_dataset: Dataset): data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())] batch = batch_project.create_batch("batch to archive", data_rows)