Labelbox · mkozik-lb · Nov 1, 2022 · Oct 26, 2022 · Oct 26, 2022 · Oct 27, 2022
@@ -3,6 +3,7 @@
 # In progress
 ## Changed
 * Default behavior for metrics to not include subclasses in the calculation.
+* Updated `create_batch` method to accept consensus settings.
 
 ## Fixed
 * Polygon extraction from masks creating invalid polygons. This would cause issues in the coco converter.

@@ -32,6 +32,7 @@ class Batch(DbObject):
     created_at = Field.DateTime("created_at")
     updated_at = Field.DateTime("updated_at")
     size = Field.Int("size")
+    consensus_settings = Field.Json("consensus_settings_json")
 
     # Relationships
     created_by = Relationship.ToOne("User")

@@ -0,0 +1,18 @@
+from labelbox.utils import _CamelCaseMixin
+
+
+class ConsensusSettings(_CamelCaseMixin):
+    """Container for holding consensus quality settings
+
+    >>> ConsensusSettings(
+    >>>    number_of_labels = 2,
+    >>>    coverage_percentage = 0.2
+    >>>  )
+
+    Args:
+        number_of_labels: Number of labels for consensus
+        coverage_percentage: Percentage of data rows to be labeled more than once
+    """
+
+    number_of_labels: int
+    coverage_percentage: float
@@ -8,7 +8,7 @@
 from pydantic import BaseModel, conlist, constr
 
 from labelbox.schema.ontology import SchemaId
-from labelbox.utils import camel_case
+from labelbox.utils import _CamelCaseMixin
 
 
 class DataRowMetadataKind(Enum):
@@ -36,13 +36,6 @@ class DataRowMetadataSchema(BaseModel):
 String: Type[str] = constr(max_length=500)
 
 
-class _CamelCaseMixin(BaseModel):
-
-    class Config:
-        allow_population_by_field_name = True
-        alias_generator = camel_case
-
-
 # Metadata base class
 class DataRowMetadataField(_CamelCaseMixin):
     schema_id: SchemaId

@@ -16,6 +16,7 @@
 from labelbox.orm.db_object import DbObject, Updateable, Deletable
 from labelbox.orm.model import Entity, Field, Relationship
 from labelbox.pagination import PaginatedCollection
+from labelbox.schema.consensus_settings import ConsensusSettings
 from labelbox.schema.media_type import MediaType
 from labelbox.schema.queue_mode import QueueMode
 from labelbox.schema.resource_tag import ResourceTag
@@ -561,14 +562,18 @@ def setup(self, labeling_frontend, labeling_frontend_options) -> None:
         timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
         self.update(setup_complete=timestamp)
 
-    def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
+    def create_batch(self,
+                     name: str,
+                     data_rows: List[str],
+                     priority: int = 5,
+                     consensus_settings: Optional[Dict[str, float]] = None):
         """Create a new batch for a project. Batches is in Beta and subject to change
 
         Args:
             name: a name for the batch, must be unique within a project
             data_rows: Either a list of `DataRows` or Data Row ids
             priority: An optional priority for the Data Rows in the Batch. 1 highest -> 5 lowest
-
+            consensus_settings: An optional dictionary with consensus settings: {'number_of_labels': 3, 'coverage_percentage': 0.1}
         """
 
         # @TODO: make this automatic?
@@ -600,12 +605,16 @@ def create_batch(self, name: str, data_rows: List[str], priority: int = 5):
             }
         """ % (method, method, query.results_query_part(Entity.Batch))
 
+        if consensus_settings:
+            consensus_settings = ConsensusSettings(**consensus_settings).dict(
+                by_alias=True)
         params = {
             "projectId": self.uid,
             "batchInput": {
                 "name": name,
                 "dataRowIds": dr_ids,
-                "priority": priority
+                "priority": priority,
+                "consensusSettings": consensus_settings
             }
         }
 

@@ -1,4 +1,5 @@
 import re
+from pydantic import BaseModel
 
 
 def _convert(s, sep, title):
@@ -23,3 +24,10 @@ def title_case(s):
 def snake_case(s):
     """ Converts a string in [snake|camel|title]case to snake_case. """
     return _convert(s, "_", lambda i: False)
+
+
+class _CamelCaseMixin(BaseModel):
+
+    class Config:
+        allow_population_by_field_name = True
+        alias_generator = camel_case
@@ -177,7 +177,9 @@ def batch_project(client, rand_gen):
 
 @pytest.fixture
 def consensus_project(client, rand_gen):
-    project = client.create_project(name=rand_gen(str), auto_audit_percentage=0)
+    project = client.create_project(name=rand_gen(str),
+                                    auto_audit_percentage=0,
+                                    queue_mode=QueueMode.Dataset)
     yield project
     project.delete()
 

@@ -1,7 +1,6 @@
 import pytest
 
 from labelbox import Dataset, Project
-from labelbox.schema.queue_mode import QueueMode
 
 IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg"
 
@@ -39,6 +38,19 @@ def test_create_batch(batch_project: Project, big_dataset: Dataset):
     assert batch.size == len(data_rows)
 
 
+def test_create_batch_with_consensus_settings(batch_project: Project,
+                                              big_dataset: Dataset):
+    data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())]
+    consensus_settings = {"coverage_percentage": 0.1, "number_of_labels": 3}
+    batch = batch_project.create_batch("batch with consensus settings",
+                                       data_rows,
+                                       3,
+                                       consensus_settings=consensus_settings)
+    assert batch.name == "batch with consensus settings"
+    assert batch.size == len(data_rows)
+    assert batch.consensus_settings == consensus_settings
+
+
 def test_archive_batch(batch_project: Project, small_dataset: Dataset):
     data_rows = [dr.uid for dr in list(small_dataset.export_data_rows())]
     batch = batch_project.create_batch("batch to archive", data_rows)