From dd771af3aef37f471279da510e7a82aca29d6e07 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Wed, 29 Sep 2021 09:52:35 -0700 Subject: [PATCH 01/17] ADD: beta batch queue methods --- labelbox/data/annotation_types/data/raster.py | 12 ++++++++++++ labelbox/data/annotation_types/geometry/rectangle.py | 5 +++++ .../data/annotation_types/geometry/test_rectangle.py | 2 ++ 3 files changed, 19 insertions(+) diff --git a/labelbox/data/annotation_types/data/raster.py b/labelbox/data/annotation_types/data/raster.py index ad7ad88e5..5d653d053 100644 --- a/labelbox/data/annotation_types/data/raster.py +++ b/labelbox/data/annotation_types/data/raster.py @@ -32,6 +32,18 @@ def from_2D_arr(cls, arr: TypedArray[Literal['uint8']], **kwargs): arr = np.stack((arr,) * 3, axis=-1) return cls(arr=arr, **kwargs) + @classmethod + def from_2D_array(cls, arr: TypedArray[Literal['uint8']], **kwargs): + + + + if len(arr.shape) != 2: + raise ValueError( + f"Found array with shape {arr.shape}. Expected two dimensions ([W,H])" + ) + arr = np.stack((arr,) * 3, axis=-1) + return cls(arr=arr, **kwargs) + def bytes_to_np(self, image_bytes: bytes) -> np.ndarray: """ Converts image bytes to a numpy array diff --git a/labelbox/data/annotation_types/geometry/rectangle.py b/labelbox/data/annotation_types/geometry/rectangle.py index 3579f261b..d23fca175 100644 --- a/labelbox/data/annotation_types/geometry/rectangle.py +++ b/labelbox/data/annotation_types/geometry/rectangle.py @@ -51,3 +51,8 @@ def draw(self, if thickness == -1: return cv2.fillPoly(canvas, pts, color) return cv2.polylines(canvas, pts, True, color, thickness) + + @classmethod + def from_tlhw(cls, top: float, left: float, height: float, width: float): + """Convert from top, left, height, width format""" + return Rectangle(start=Point(x=left, y=top), end=Point(x=left + width, y=top + height)) diff --git a/tests/data/annotation_types/geometry/test_rectangle.py b/tests/data/annotation_types/geometry/test_rectangle.py index 14d2b7316..369e2b6ec 100644 --- a/tests/data/annotation_types/geometry/test_rectangle.py +++ b/tests/data/annotation_types/geometry/test_rectangle.py @@ -18,3 +18,5 @@ def test_rectangle(): raster = rectangle.draw(height=32, width=32) assert (cv2.imread("tests/data/assets/rectangle.png") == raster).all() + + alt_rectangle = Rectangle.from_tlhw(top=1, left=0, height=) \ No newline at end of file From fa0062f1ee66db2c562afb90b5aa15f9e32fc6ce Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Wed, 29 Sep 2021 09:53:31 -0700 Subject: [PATCH 02/17] ADD: beta batch queue methods --- labelbox/schema/project.py | 117 ++++++++++++++++++++++++++++-- tests/integration/test_batch.py | 29 ++++++++ tests/integration/test_project.py | 9 ++- 3 files changed, 145 insertions(+), 10 deletions(-) create mode 100644 tests/integration/test_batch.py diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 9618d72f5..d1f190d3b 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -1,27 +1,30 @@ import json -import time import logging +import time +import warnings from collections import namedtuple from datetime import datetime, timezone from pathlib import Path from typing import Dict, Union, Iterable from urllib.parse import urlparse -import requests + import ndjson +import requests from labelbox import utils -from labelbox.schema.data_row import DataRow -from labelbox.orm import query -from labelbox.schema.bulk_import_request import BulkImportRequest from labelbox.exceptions import InvalidQueryError, LabelboxError +from labelbox.orm import query from labelbox.orm.db_object import DbObject, Updateable, Deletable from labelbox.orm.model import Entity, Field, Relationship from labelbox.pagination import PaginatedCollection +from labelbox.schema.bulk_import_request import BulkImportRequest +from labelbox.schema.data_row import DataRow try: datetime.fromisoformat # type: ignore[attr-defined] except AttributeError: from backports.datetime_fromisoformat import MonkeyPatch + MonkeyPatch.patch_fromisoformat() try: @@ -65,6 +68,7 @@ class Project(DbObject, Updateable, Deletable): last_activity_time = Field.DateTime("last_activity_time") auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels") auto_audit_percentage = Field.Float("auto_audit_percentage") + tag_set_status = Field.String("tag_set_status") # Relationships datasets = Relationship.ToMany("Dataset", True) @@ -248,8 +252,7 @@ def export_labels(self, download=False, timeout_seconds=600): if timeout_seconds <= 0: return None - logger.debug("Project '%s' label export, waiting for server...", - self.uid) + logger.debug("Project '%s' label export, waiting for server...", self.uid) time.sleep(sleep_time) def export_issues(self, status=None): @@ -424,6 +427,104 @@ def setup(self, labeling_frontend, labeling_frontend_options): timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) + def _post_batch(self, name, method, data_rows): + """Create """ + + ids = [dr.uid for dr in data_rows] + + if len(ids) > 1000: + raise ValueError("1000 Max DataRows at a time") + + query = """mutation %s($projectId: ID!, $dataRowIds: [ID!]!) { + project(where: {id: $projectId}) { + %s(data: {dataRowIds: $dataRowIds}) { + dataRows { + dataRowId + error + } + } + } + } + """ % (name, method) + + res = self.client.execute( + query, + {"projectId": self.uid, "dataRowIds": ids} + )["project"]["submitBatchOfDataRows"]["dataRows"] + + # TODO: figure out error messaging + if len(data_rows) == len(res): + raise ValueError("No dataRows were submitted successfully") + + if len(data_rows) > 0: + warnings.warn("Some Data Rows were not submitted successfully") + + return res + + def queue_data_rows(self, data_rows): + """Add DataRows to the Project queue""" + + if not self._is_batch_mode(): + warnings.warn("Project not in Batch mode, ") + + method = "submitBatchOfDataRows" + name = method + 'PyApi' + return self._post_batch(name, method, data_rows) + + def dequeue_data_rows(self, data_rows): + + if not self._is_batch_mode(): + warnings.warn("Project not in Batch mode") + + method = "removeBatchOfDataRows" + name = method + 'PyApi' + + return self._post_batch(name, method, data_rows) + + def change_queue_mode(self, mode: str): + """Change the queue between Batch and Datasets mode + + Args: + mode: `BATCH` or `DATASET` + """ + if mode == "BATCH": + self._update_queue_mode("ENABLED") + + elif mode == "DATASET": + self._update_queue_mode("DISABLED") + else: + raise ValueError("Must provide either `BATCH` or `DATASET` as a mode") + + def _update_queue_mode(self, status: str): + + query_str = """mutation %s($projectId: ID!, $status: TagSetStatusInput!) { + project(where: {id: $projectId}) { + setTagSetStatus(input: {tagSetStatus: $status}) { + id + tagSetStatus + __typename + } + __typename + } + } + """ % "setTagSetStatusPyApi" + + self.client.execute(query_str, {'projectId': self.uid, 'status': status}) + self.tag_set_status = status + + def queue_mode(self): + + if self._is_batch_mode(): + return "BATCH" + else: + return "DATASET" + + def _is_batch_mode(self): + if self.tag_set_status == "ENABLED": + return True + else: + return False + def validate_labeling_parameter_overrides(self, data): for idx, row in enumerate(data): if len(row) != 3: @@ -689,7 +790,7 @@ class LabelingParameterOverride(DbObject): LabelerPerformance = namedtuple( "LabelerPerformance", "user count seconds_per_label, total_time_labeling " - "consensus average_benchmark_agreement last_activity_time") + "consensus average_benchmark_agreement last_activity_time") LabelerPerformance.__doc__ = ( "Named tuple containing info about a labeler's performance.") diff --git a/tests/integration/test_batch.py b/tests/integration/test_batch.py new file mode 100644 index 000000000..8326204fa --- /dev/null +++ b/tests/integration/test_batch.py @@ -0,0 +1,29 @@ +import pytest + +from labelbox import Dataset, Project + +IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg" + +@pytest.fixture +def big_dataset(dataset: Dataset): + task = dataset.create_data_rows( + [ + { + "row_data": IMAGE_URL, + "external_id": "my-image" + }, + ] * 250) + task.wait_till_done() + + yield dataset + dataset.delete() + + +def test_submit_batch(configured_project: Project, big_dataset): + configured_project.change_queue_mode("BATCH") + + data_rows = list(big_dataset.export_data_rows()) + queue_res = configured_project.queue_data_rows(data_rows) + assert not len(queue_res) + dequeue_res = configured_project.dequeue_data_rows(data_rows) + assert not len(dequeue_res) diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index ef4307dfa..d3778972c 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -1,7 +1,5 @@ import json -import requests -import ndjson import pytest from labelbox import Project, LabelingFrontend @@ -107,3 +105,10 @@ def test_attach_instructions(client, project): def test_queued_data_row_export(configured_project): result = configured_project.export_queued_data_rows() assert len(result) == 1 + + +def test_queue_mode(configured_project: Project): + assert configured_project.tag_set_status == 'DISABLED' + configured_project.change_queue_mode("BATCH") + # TODO: understand why this fails + assert configured_project.tag_set_status == 'ENABLED' From 2c00f5683b5eda037e69b490366e95f00450c7af Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Wed, 29 Sep 2021 11:55:30 -0700 Subject: [PATCH 03/17] FIX: dequeue method --- labelbox/schema/project.py | 47 +++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index d1f190d3b..23ff965b8 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -34,6 +34,8 @@ logger = logging.getLogger(__name__) +MAX_BATCH_SIZE = 1000 + class Project(DbObject, Updateable, Deletable): """ A Project is a container that includes a labeling frontend, an ontology, @@ -427,6 +429,28 @@ def setup(self, labeling_frontend, labeling_frontend_options): timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) + + + def queue_data_rows(self, data_rows): + """Add DataRows to the Project queue""" + + if not self._is_batch_mode(): + warnings.warn("Project not in Batch mode, ") + + method = "submitBatchOfDataRows" + name = method + 'PyApi' + return self._post_batch(name, method, data_rows) + + def dequeue_data_rows(self, data_rows): + + if not self._is_batch_mode(): + warnings.warn("Project not in Batch mode") + + method = "removeBatchOfDataRows" + name = method + 'PyApi' + + return self._post_batch(name, method, data_rows) + def _post_batch(self, name, method, data_rows): """Create """ @@ -450,7 +474,7 @@ def _post_batch(self, name, method, data_rows): res = self.client.execute( query, {"projectId": self.uid, "dataRowIds": ids} - )["project"]["submitBatchOfDataRows"]["dataRows"] + )["project"][method]["dataRows"] # TODO: figure out error messaging if len(data_rows) == len(res): @@ -461,26 +485,6 @@ def _post_batch(self, name, method, data_rows): return res - def queue_data_rows(self, data_rows): - """Add DataRows to the Project queue""" - - if not self._is_batch_mode(): - warnings.warn("Project not in Batch mode, ") - - method = "submitBatchOfDataRows" - name = method + 'PyApi' - return self._post_batch(name, method, data_rows) - - def dequeue_data_rows(self, data_rows): - - if not self._is_batch_mode(): - warnings.warn("Project not in Batch mode") - - method = "removeBatchOfDataRows" - name = method + 'PyApi' - - return self._post_batch(name, method, data_rows) - def change_queue_mode(self, mode: str): """Change the queue between Batch and Datasets mode @@ -495,6 +499,7 @@ def change_queue_mode(self, mode: str): else: raise ValueError("Must provide either `BATCH` or `DATASET` as a mode") + def _update_queue_mode(self, status: str): query_str = """mutation %s($projectId: ID!, $status: TagSetStatusInput!) { From 023c7ba66e42b121fb565319508710b6d8a42351 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Wed, 29 Sep 2021 12:05:21 -0700 Subject: [PATCH 04/17] FIX: dequeue method --- labelbox/schema/project.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 23ff965b8..5fc53a63a 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -429,8 +429,6 @@ def setup(self, labeling_frontend, labeling_frontend_options): timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) - - def queue_data_rows(self, data_rows): """Add DataRows to the Project queue""" @@ -499,7 +497,6 @@ def change_queue_mode(self, mode: str): else: raise ValueError("Must provide either `BATCH` or `DATASET` as a mode") - def _update_queue_mode(self, status: str): query_str = """mutation %s($projectId: ID!, $status: TagSetStatusInput!) { From 4dd80298070de634d020e3f5e7a0d445930990e9 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Tue, 2 Nov 2021 18:12:27 -0700 Subject: [PATCH 05/17] CHG: update methods for GA --- labelbox/schema/project.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 5fc53a63a..775b54d37 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -419,7 +419,7 @@ def setup(self, labeling_frontend, labeling_frontend_options): self.labeling_frontend.connect(labeling_frontend) LFO = Entity.LabelingFrontendOptions - labeling_frontend_options = self.client._create( + self.client._create( LFO, { LFO.project: self, LFO.labeling_frontend: labeling_frontend, @@ -429,35 +429,33 @@ def setup(self, labeling_frontend, labeling_frontend_options): timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) - def queue_data_rows(self, data_rows): + def queue(self, data_rows): """Add DataRows to the Project queue""" if not self._is_batch_mode(): warnings.warn("Project not in Batch mode, ") method = "submitBatchOfDataRows" - name = method + 'PyApi' - return self._post_batch(name, method, data_rows) + return self._post_batch(method, data_rows) - def dequeue_data_rows(self, data_rows): + def dequeue(self, data_rows): if not self._is_batch_mode(): warnings.warn("Project not in Batch mode") method = "removeBatchOfDataRows" - name = method + 'PyApi' - return self._post_batch(name, method, data_rows) + return self._post_batch(method, data_rows) - def _post_batch(self, name, method, data_rows): + def _post_batch(self, method, data_rows): """Create """ ids = [dr.uid for dr in data_rows] - if len(ids) > 1000: + if len(ids) > MAX_BATCH_SIZE: raise ValueError("1000 Max DataRows at a time") - query = """mutation %s($projectId: ID!, $dataRowIds: [ID!]!) { + query = """mutation %sPyApi($projectId: ID!, $dataRowIds: [ID!]!) { project(where: {id: $projectId}) { %s(data: {dataRowIds: $dataRowIds}) { dataRows { @@ -467,7 +465,7 @@ def _post_batch(self, name, method, data_rows): } } } - """ % (name, method) + """ % method res = self.client.execute( query, @@ -802,4 +800,4 @@ def _check_converter_import(): raise ImportError( "Missing dependencies to import converter. " "Use `pip install labelbox[data]` to add missing dependencies. " - "or download raw json with project.export_labels()") + "or download raw json with project.export_labels()") \ No newline at end of file From 6212be5c73dad378938e4c0d6f18236bf0cfc380 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Tue, 2 Nov 2021 18:23:27 -0700 Subject: [PATCH 06/17] Format and fix bugs --- labelbox/data/annotation_types/data/raster.py | 2 -- labelbox/schema/project.py | 23 +++++++++++-------- .../geometry/test_rectangle.py | 11 +++++---- tests/integration/test_batch.py | 18 +++++++-------- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/labelbox/data/annotation_types/data/raster.py b/labelbox/data/annotation_types/data/raster.py index 35ffe751f..15b8138a7 100644 --- a/labelbox/data/annotation_types/data/raster.py +++ b/labelbox/data/annotation_types/data/raster.py @@ -54,8 +54,6 @@ def from_2D_arr(cls, arr: Union[TypedArray[Literal['uint8']], @classmethod def from_2D_array(cls, arr: TypedArray[Literal['uint8']], **kwargs): - - if len(arr.shape) != 2: raise ValueError( f"Found array with shape {arr.shape}. Expected two dimensions ([W,H])" diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 775b54d37..53e888d4c 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -254,7 +254,8 @@ def export_labels(self, download=False, timeout_seconds=600): if timeout_seconds <= 0: return None - logger.debug("Project '%s' label export, waiting for server...", self.uid) + logger.debug("Project '%s' label export, waiting for server...", + self.uid) time.sleep(sleep_time) def export_issues(self, status=None): @@ -467,10 +468,10 @@ def _post_batch(self, method, data_rows): } """ % method - res = self.client.execute( - query, - {"projectId": self.uid, "dataRowIds": ids} - )["project"][method]["dataRows"] + res = self.client.execute(query, { + "projectId": self.uid, + "dataRowIds": ids + })["project"][method]["dataRows"] # TODO: figure out error messaging if len(data_rows) == len(res): @@ -493,7 +494,8 @@ def change_queue_mode(self, mode: str): elif mode == "DATASET": self._update_queue_mode("DISABLED") else: - raise ValueError("Must provide either `BATCH` or `DATASET` as a mode") + raise ValueError( + "Must provide either `BATCH` or `DATASET` as a mode") def _update_queue_mode(self, status: str): @@ -509,7 +511,10 @@ def _update_queue_mode(self, status: str): } """ % "setTagSetStatusPyApi" - self.client.execute(query_str, {'projectId': self.uid, 'status': status}) + self.client.execute(query_str, { + 'projectId': self.uid, + 'status': status + }) self.tag_set_status = status def queue_mode(self): @@ -790,7 +795,7 @@ class LabelingParameterOverride(DbObject): LabelerPerformance = namedtuple( "LabelerPerformance", "user count seconds_per_label, total_time_labeling " - "consensus average_benchmark_agreement last_activity_time") + "consensus average_benchmark_agreement last_activity_time") LabelerPerformance.__doc__ = ( "Named tuple containing info about a labeler's performance.") @@ -800,4 +805,4 @@ def _check_converter_import(): raise ImportError( "Missing dependencies to import converter. " "Use `pip install labelbox[data]` to add missing dependencies. " - "or download raw json with project.export_labels()") \ No newline at end of file + "or download raw json with project.export_labels()") diff --git a/tests/data/annotation_types/geometry/test_rectangle.py b/tests/data/annotation_types/geometry/test_rectangle.py index 369e2b6ec..102d2014c 100644 --- a/tests/data/annotation_types/geometry/test_rectangle.py +++ b/tests/data/annotation_types/geometry/test_rectangle.py @@ -1,6 +1,5 @@ -from pydantic import ValidationError import pytest -import cv2 +from pydantic import ValidationError from labelbox.data.annotation_types import Point, Rectangle @@ -16,7 +15,9 @@ def test_rectangle(): expected['coordinates'] = tuple([tuple([tuple(x) for x in points[0]])]) assert rectangle.shapely.__geo_interface__ == expected - raster = rectangle.draw(height=32, width=32) - assert (cv2.imread("tests/data/assets/rectangle.png") == raster).all() + # raster = rectangle.draw(height=32, width=32) + # assert (cv2.imread("tests/data/assets/rectangle.png") == raster).all() - alt_rectangle = Rectangle.from_tlhw(top=1, left=0, height=) \ No newline at end of file + xyhw = Rectangle.from_xyhw(0., 0, 10, 10) + assert xyhw.start == Point(x=0, y=0.) + assert xyhw.end == Point(x=10, y=10.0) diff --git a/tests/integration/test_batch.py b/tests/integration/test_batch.py index 8326204fa..1161618f6 100644 --- a/tests/integration/test_batch.py +++ b/tests/integration/test_batch.py @@ -4,15 +4,15 @@ IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg" + @pytest.fixture def big_dataset(dataset: Dataset): - task = dataset.create_data_rows( - [ - { - "row_data": IMAGE_URL, - "external_id": "my-image" - }, - ] * 250) + task = dataset.create_data_rows([ + { + "row_data": IMAGE_URL, + "external_id": "my-image" + }, + ] * 250) task.wait_till_done() yield dataset @@ -23,7 +23,7 @@ def test_submit_batch(configured_project: Project, big_dataset): configured_project.change_queue_mode("BATCH") data_rows = list(big_dataset.export_data_rows()) - queue_res = configured_project.queue_data_rows(data_rows) + queue_res = configured_project.queue(data_rows) assert not len(queue_res) - dequeue_res = configured_project.dequeue_data_rows(data_rows) + dequeue_res = configured_project.dequeue(data_rows) assert not len(dequeue_res) From 93cdfafd07f76cd3af32ecabb2668d2ca01b207a Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Tue, 2 Nov 2021 20:29:03 -0700 Subject: [PATCH 07/17] CHG: move into update statement --- labelbox/schema/project.py | 50 ++++++++++++++++++--------------- tests/integration/test_batch.py | 3 +- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 53e888d4c..bf8fd6c14 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -1,3 +1,4 @@ +import enum import json import logging import time @@ -5,7 +6,7 @@ from collections import namedtuple from datetime import datetime, timezone from pathlib import Path -from typing import Dict, Union, Iterable +from typing import Dict, Union, Iterable, List, Optional from urllib.parse import urlparse import ndjson @@ -37,6 +38,11 @@ MAX_BATCH_SIZE = 1000 +class QueueMode(enum.Enum): + Batch = "Batch" + Dataset = "Dataset" + + class Project(DbObject, Updateable, Deletable): """ A Project is a container that includes a labeling frontend, an ontology, datasets and labels. @@ -70,7 +76,6 @@ class Project(DbObject, Updateable, Deletable): last_activity_time = Field.DateTime("last_activity_time") auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels") auto_audit_percentage = Field.Float("auto_audit_percentage") - tag_set_status = Field.String("tag_set_status") # Relationships datasets = Relationship.ToMany("Dataset", True) @@ -85,6 +90,14 @@ class Project(DbObject, Updateable, Deletable): benchmarks = Relationship.ToMany("Benchmark", False) ontology = Relationship.ToOne("Ontology", True) + def update(self, **kwargs): + + mode: Optional[QueueMode] = kwargs.pop("queue_mode", None) + if mode: + self._update_queue_mode(mode) + + return super().update(**kwargs) + def members(self): """ Fetch all current members for this project @@ -430,22 +443,21 @@ def setup(self, labeling_frontend, labeling_frontend_options): timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) - def queue(self, data_rows): + def queue(self, data_rows: List): """Add DataRows to the Project queue""" if not self._is_batch_mode(): - warnings.warn("Project not in Batch mode, ") + warnings.warn("Project not in Batch mode") method = "submitBatchOfDataRows" return self._post_batch(method, data_rows) - def dequeue(self, data_rows): + def dequeue(self, data_rows: List): if not self._is_batch_mode(): warnings.warn("Project not in Batch mode") method = "removeBatchOfDataRows" - return self._post_batch(method, data_rows) def _post_batch(self, method, data_rows): @@ -482,22 +494,16 @@ def _post_batch(self, method, data_rows): return res - def change_queue_mode(self, mode: str): - """Change the queue between Batch and Datasets mode - - Args: - mode: `BATCH` or `DATASET` - """ - if mode == "BATCH": - self._update_queue_mode("ENABLED") + def _update_queue_mode(self, mode: QueueMode): - elif mode == "DATASET": - self._update_queue_mode("DISABLED") + if mode == QueueMode.Batch: + status = "ENABLED" + elif mode == QueueMode: + status = "DISABLED" else: raise ValueError( - "Must provide either `BATCH` or `DATASET` as a mode") - - def _update_queue_mode(self, status: str): + "Must provide either `BATCH` or `DATASET` as a mode" + ) query_str = """mutation %s($projectId: ID!, $status: TagSetStatusInput!) { project(where: {id: $projectId}) { @@ -520,9 +526,9 @@ def _update_queue_mode(self, status: str): def queue_mode(self): if self._is_batch_mode(): - return "BATCH" + return QueueMode.Batch else: - return "DATASET" + return QueueMode.Dataset def _is_batch_mode(self): if self.tag_set_status == "ENABLED": @@ -795,7 +801,7 @@ class LabelingParameterOverride(DbObject): LabelerPerformance = namedtuple( "LabelerPerformance", "user count seconds_per_label, total_time_labeling " - "consensus average_benchmark_agreement last_activity_time") + "consensus average_benchmark_agreement last_activity_time") LabelerPerformance.__doc__ = ( "Named tuple containing info about a labeler's performance.") diff --git a/tests/integration/test_batch.py b/tests/integration/test_batch.py index 1161618f6..df42e63a5 100644 --- a/tests/integration/test_batch.py +++ b/tests/integration/test_batch.py @@ -1,6 +1,7 @@ import pytest from labelbox import Dataset, Project +from labelbox.schema.project import QueueMode IMAGE_URL = "https://storage.googleapis.com/diagnostics-demo-data/coco/COCO_train2014_000000000034.jpg" @@ -20,7 +21,7 @@ def big_dataset(dataset: Dataset): def test_submit_batch(configured_project: Project, big_dataset): - configured_project.change_queue_mode("BATCH") + configured_project.update(queue_mode=QueueMode.Batch) data_rows = list(big_dataset.export_data_rows()) queue_res = configured_project.queue(data_rows) From 3f927dc0b16a9d96ce84775e193288f7bf376cae Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Wed, 3 Nov 2021 10:30:45 -0700 Subject: [PATCH 08/17] CHG: remove internal flag --- labelbox/schema/project.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index bf8fd6c14..26c85079c 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -43,6 +43,12 @@ class QueueMode(enum.Enum): Dataset = "Dataset" +class QueueErrors(enum.Enum): + InvalidDataRowType = 'InvalidDataRowType' + AlreadyInProject = 'AlreadyInProject' + HasAttachedLabel = 'HasAttachedLabel' + + class Project(DbObject, Updateable, Deletable): """ A Project is a container that includes a labeling frontend, an ontology, datasets and labels. @@ -426,7 +432,7 @@ def setup(self, labeling_frontend, labeling_frontend_options): a.k.a. project ontology. If given a `dict` it will be converted to `str` using `json.dumps`. """ - organization = self.client.get_organization() + if not isinstance(labeling_frontend_options, str): labeling_frontend_options = json.dumps(labeling_frontend_options) @@ -498,7 +504,7 @@ def _update_queue_mode(self, mode: QueueMode): if mode == QueueMode.Batch: status = "ENABLED" - elif mode == QueueMode: + elif mode == QueueMode.Dataset: status = "DISABLED" else: raise ValueError( @@ -521,20 +527,26 @@ def _update_queue_mode(self, mode: QueueMode): 'projectId': self.uid, 'status': status }) - self.tag_set_status = status def queue_mode(self): - if self._is_batch_mode(): + query_str = """query %s($projectId: ID!, $status: TagSetStatusInput!) { + project(where: {id: $projectId}) { + id + tagSetStatus + __typename + } + } + """ % "GetTagSetStatusPyApi" + + status = self.client.execute(query_str, {'projectId': self.uid})["project"]["tagSetStatus"] + + if status == "ENABLED": return QueueMode.Batch - else: + elif status == "DISABLED": return QueueMode.Dataset - - def _is_batch_mode(self): - if self.tag_set_status == "ENABLED": - return True else: - return False + raise ValueError("this is weird") def validate_labeling_parameter_overrides(self, data): for idx, row in enumerate(data): From d3f48376291022bd8713a9a833d9098a9f4a8451 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Wed, 3 Nov 2021 10:59:59 -0700 Subject: [PATCH 09/17] CHG: data row ids and require batch mode --- labelbox/schema/project.py | 30 ++++++++++++++---------------- tests/integration/test_batch.py | 2 +- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 26c85079c..306fdd1c4 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -449,29 +449,27 @@ def setup(self, labeling_frontend, labeling_frontend_options): timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") self.update(setup_complete=timestamp) - def queue(self, data_rows: List): + def queue(self, data_row_ids: List[str]): """Add DataRows to the Project queue""" - if not self._is_batch_mode(): - warnings.warn("Project not in Batch mode") + if self.queue_mode() != QueueMode.Batch: + raise ValueError("Project must be in batch mode") method = "submitBatchOfDataRows" - return self._post_batch(method, data_rows) + return self._post_batch(method, data_row_ids) - def dequeue(self, data_rows: List): + def dequeue(self, data_row_ids: List[str]): - if not self._is_batch_mode(): - warnings.warn("Project not in Batch mode") + if self.queue_mode() != QueueMode.Batch: + raise ValueError("Project must be in batch mode") method = "removeBatchOfDataRows" - return self._post_batch(method, data_rows) + return self._post_batch(method, data_row_ids) - def _post_batch(self, method, data_rows): + def _post_batch(self, method, data_row_ids): """Create """ - ids = [dr.uid for dr in data_rows] - - if len(ids) > MAX_BATCH_SIZE: + if len(data_row_ids) > MAX_BATCH_SIZE: raise ValueError("1000 Max DataRows at a time") query = """mutation %sPyApi($projectId: ID!, $dataRowIds: [ID!]!) { @@ -488,14 +486,14 @@ def _post_batch(self, method, data_rows): res = self.client.execute(query, { "projectId": self.uid, - "dataRowIds": ids + "dataRowIds": data_row_ids })["project"][method]["dataRows"] # TODO: figure out error messaging - if len(data_rows) == len(res): + if len(data_row_ids) == len(res): raise ValueError("No dataRows were submitted successfully") - if len(data_rows) > 0: + if len(data_row_ids) > 0: warnings.warn("Some Data Rows were not submitted successfully") return res @@ -546,7 +544,7 @@ def queue_mode(self): elif status == "DISABLED": return QueueMode.Dataset else: - raise ValueError("this is weird") + raise ValueError("Status not known") def validate_labeling_parameter_overrides(self, data): for idx, row in enumerate(data): diff --git a/tests/integration/test_batch.py b/tests/integration/test_batch.py index df42e63a5..8534d8d64 100644 --- a/tests/integration/test_batch.py +++ b/tests/integration/test_batch.py @@ -23,7 +23,7 @@ def big_dataset(dataset: Dataset): def test_submit_batch(configured_project: Project, big_dataset): configured_project.update(queue_mode=QueueMode.Batch) - data_rows = list(big_dataset.export_data_rows()) + data_rows = [dr.uid for dr in list(big_dataset.export_data_rows())] queue_res = configured_project.queue(data_rows) assert not len(queue_res) dequeue_res = configured_project.dequeue(data_rows) From 97c00adc348165fe40890e28776fdee0e3b5e0b0 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Wed, 3 Nov 2021 12:14:24 -0700 Subject: [PATCH 10/17] CHG: address comments --- labelbox/schema/project.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 306fdd1c4..bd0ce7192 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -466,11 +466,11 @@ def dequeue(self, data_row_ids: List[str]): method = "removeBatchOfDataRows" return self._post_batch(method, data_row_ids) - def _post_batch(self, method, data_row_ids): + def _post_batch(self, method, data_row_ids: List[str]): """Create """ if len(data_row_ids) > MAX_BATCH_SIZE: - raise ValueError("1000 Max DataRows at a time") + raise ValueError(f"Exceed max batch size of {MAX_BATCH_SIZE}") query = """mutation %sPyApi($projectId: ID!, $dataRowIds: [ID!]!) { project(where: {id: $projectId}) { @@ -498,7 +498,10 @@ def _post_batch(self, method, data_row_ids): return res - def _update_queue_mode(self, mode: QueueMode): + def _update_queue_mode(self, mode: QueueMode) -> QueueMode: + + if self.queue_mode() == mode: + return mode if mode == QueueMode.Batch: status = "ENABLED" @@ -526,6 +529,8 @@ def _update_queue_mode(self, mode: QueueMode): 'status': status }) + return mode + def queue_mode(self): query_str = """query %s($projectId: ID!, $status: TagSetStatusInput!) { From 8b6b7a239a3b1bbbe5f2a85973ee2efd0c1200d6 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Thu, 4 Nov 2021 15:57:58 -0700 Subject: [PATCH 11/17] FIX: gql formatting --- labelbox/schema/project.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index bd0ce7192..26896940e 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -482,7 +482,7 @@ def _post_batch(self, method, data_row_ids: List[str]): } } } - """ % method + """ % (method, method) res = self.client.execute(query, { "projectId": self.uid, @@ -509,8 +509,7 @@ def _update_queue_mode(self, mode: QueueMode) -> QueueMode: status = "DISABLED" else: raise ValueError( - "Must provide either `BATCH` or `DATASET` as a mode" - ) + "Must provide either `BATCH` or `DATASET` as a mode") query_str = """mutation %s($projectId: ID!, $status: TagSetStatusInput!) { project(where: {id: $projectId}) { @@ -533,7 +532,7 @@ def _update_queue_mode(self, mode: QueueMode) -> QueueMode: def queue_mode(self): - query_str = """query %s($projectId: ID!, $status: TagSetStatusInput!) { + query_str = """query %s($projectId: ID!) { project(where: {id: $projectId}) { id tagSetStatus @@ -542,7 +541,8 @@ def queue_mode(self): } """ % "GetTagSetStatusPyApi" - status = self.client.execute(query_str, {'projectId': self.uid})["project"]["tagSetStatus"] + status = self.client.execute( + query_str, {'projectId': self.uid})["project"]["tagSetStatus"] if status == "ENABLED": return QueueMode.Batch @@ -816,7 +816,7 @@ class LabelingParameterOverride(DbObject): LabelerPerformance = namedtuple( "LabelerPerformance", "user count seconds_per_label, total_time_labeling " - "consensus average_benchmark_agreement last_activity_time") + "consensus average_benchmark_agreement last_activity_time") LabelerPerformance.__doc__ = ( "Named tuple containing info about a labeler's performance.") From 8ad5b458e335a07343ae2e2cc5fdd6f5eca08a05 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Fri, 5 Nov 2021 11:06:11 -0700 Subject: [PATCH 12/17] FIX: change queue mode tests --- tests/integration/test_project.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index d3778972c..a3704944b 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -4,6 +4,7 @@ from labelbox import Project, LabelingFrontend from labelbox.exceptions import InvalidQueryError +from labelbox.schema.project import QueueMode def test_project(client, rand_gen): @@ -108,7 +109,7 @@ def test_queued_data_row_export(configured_project): def test_queue_mode(configured_project: Project): - assert configured_project.tag_set_status == 'DISABLED' - configured_project.change_queue_mode("BATCH") + assert configured_project.queue_mode() == QueueMode.Dataset + configured_project.update(queue_mode=QueueMode.Batch) # TODO: understand why this fails - assert configured_project.tag_set_status == 'ENABLED' + assert configured_project.queue_mode() == QueueMode.Batch From e5d196f118688ca77f0e07e6a1f5fa0edefcac95 Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Fri, 5 Nov 2021 14:21:05 -0700 Subject: [PATCH 13/17] remove method --- labelbox/data/annotation_types/data/raster.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/labelbox/data/annotation_types/data/raster.py b/labelbox/data/annotation_types/data/raster.py index 15b8138a7..c5e0aba9c 100644 --- a/labelbox/data/annotation_types/data/raster.py +++ b/labelbox/data/annotation_types/data/raster.py @@ -51,16 +51,6 @@ def from_2D_arr(cls, arr: Union[TypedArray[Literal['uint8']], arr = np.stack((arr,) * 3, axis=-1) return cls(arr=arr, **kwargs) - @classmethod - def from_2D_array(cls, arr: TypedArray[Literal['uint8']], **kwargs): - - if len(arr.shape) != 2: - raise ValueError( - f"Found array with shape {arr.shape}. Expected two dimensions ([W,H])" - ) - arr = np.stack((arr,) * 3, axis=-1) - return cls(arr=arr, **kwargs) - def bytes_to_np(self, image_bytes: bytes) -> np.ndarray: """ Converts image bytes to a numpy array From 8be9e0da1e7f0ccedb624eefaa3e6c05cd6e46b1 Mon Sep 17 00:00:00 2001 From: Gareth Date: Mon, 8 Nov 2021 12:54:37 -0600 Subject: [PATCH 14/17] Update project.py --- labelbox/schema/project.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 26896940e..d4a48f97e 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -450,27 +450,25 @@ def setup(self, labeling_frontend, labeling_frontend_options): self.update(setup_complete=timestamp) def queue(self, data_row_ids: List[str]): - """Add DataRows to the Project queue""" - - if self.queue_mode() != QueueMode.Batch: - raise ValueError("Project must be in batch mode") + """Add Data Rows to the Project queue""" method = "submitBatchOfDataRows" return self._post_batch(method, data_row_ids) def dequeue(self, data_row_ids: List[str]): - - if self.queue_mode() != QueueMode.Batch: - raise ValueError("Project must be in batch mode") + """Remove Data Rows from the Project queue""" method = "removeBatchOfDataRows" return self._post_batch(method, data_row_ids) def _post_batch(self, method, data_row_ids: List[str]): - """Create """ + """Post batch methods""" + + if self.queue_mode() != QueueMode.Batch: + raise ValueError("Project must be in batch mode") if len(data_row_ids) > MAX_BATCH_SIZE: - raise ValueError(f"Exceed max batch size of {MAX_BATCH_SIZE}") + raise ValueError(f"Batch exceeds max size of {MAX_BATCH_SIZE}, consider breaking it into parts") query = """mutation %sPyApi($projectId: ID!, $dataRowIds: [ID!]!) { project(where: {id: $projectId}) { From 89a9b00b714215794dc1601e17962db2ef0f92bd Mon Sep 17 00:00:00 2001 From: Gareth Date: Mon, 8 Nov 2021 11:49:23 -0800 Subject: [PATCH 15/17] Address pr comments --- labelbox/schema/project.py | 11 +++-------- .../data/annotation_types/geometry/test_rectangle.py | 4 ++-- tests/integration/test_project.py | 1 - 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index d4a48f97e..43a57d3ed 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -35,7 +35,7 @@ logger = logging.getLogger(__name__) -MAX_BATCH_SIZE = 1000 +MAX_QUEUE_BATCH_SIZE = 1000 class QueueMode(enum.Enum): @@ -467,8 +467,8 @@ def _post_batch(self, method, data_row_ids: List[str]): if self.queue_mode() != QueueMode.Batch: raise ValueError("Project must be in batch mode") - if len(data_row_ids) > MAX_BATCH_SIZE: - raise ValueError(f"Batch exceeds max size of {MAX_BATCH_SIZE}, consider breaking it into parts") + if len(data_row_ids) > MAX_QUEUE_BATCH_SIZE: + raise ValueError(f"Batch exceeds max size of {MAX_QUEUE_BATCH_SIZE}, consider breaking it into parts") query = """mutation %sPyApi($projectId: ID!, $dataRowIds: [ID!]!) { project(where: {id: $projectId}) { @@ -512,11 +512,8 @@ def _update_queue_mode(self, mode: QueueMode) -> QueueMode: query_str = """mutation %s($projectId: ID!, $status: TagSetStatusInput!) { project(where: {id: $projectId}) { setTagSetStatus(input: {tagSetStatus: $status}) { - id tagSetStatus - __typename } - __typename } } """ % "setTagSetStatusPyApi" @@ -532,9 +529,7 @@ def queue_mode(self): query_str = """query %s($projectId: ID!) { project(where: {id: $projectId}) { - id tagSetStatus - __typename } } """ % "GetTagSetStatusPyApi" diff --git a/tests/data/annotation_types/geometry/test_rectangle.py b/tests/data/annotation_types/geometry/test_rectangle.py index 102d2014c..fa13deebc 100644 --- a/tests/data/annotation_types/geometry/test_rectangle.py +++ b/tests/data/annotation_types/geometry/test_rectangle.py @@ -15,8 +15,8 @@ def test_rectangle(): expected['coordinates'] = tuple([tuple([tuple(x) for x in points[0]])]) assert rectangle.shapely.__geo_interface__ == expected - # raster = rectangle.draw(height=32, width=32) - # assert (cv2.imread("tests/data/assets/rectangle.png") == raster).all() + raster = rectangle.draw(height=32, width=32) + assert (cv2.imread("tests/data/assets/rectangle.png") == raster).all() xyhw = Rectangle.from_xyhw(0., 0, 10, 10) assert xyhw.start == Point(x=0, y=0.) diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index a3704944b..ec7a24ac7 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -111,5 +111,4 @@ def test_queued_data_row_export(configured_project): def test_queue_mode(configured_project: Project): assert configured_project.queue_mode() == QueueMode.Dataset configured_project.update(queue_mode=QueueMode.Batch) - # TODO: understand why this fails assert configured_project.queue_mode() == QueueMode.Batch From 9760126f46908093d4820191a5eca17cc14e447e Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Mon, 8 Nov 2021 15:56:44 -0600 Subject: [PATCH 16/17] Format and use test dir --- labelbox/schema/project.py | 6 ++++-- tests/data/annotation_types/data/test_text.py | 14 +++++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 43a57d3ed..14bb96230 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -463,12 +463,14 @@ def dequeue(self, data_row_ids: List[str]): def _post_batch(self, method, data_row_ids: List[str]): """Post batch methods""" - + if self.queue_mode() != QueueMode.Batch: raise ValueError("Project must be in batch mode") if len(data_row_ids) > MAX_QUEUE_BATCH_SIZE: - raise ValueError(f"Batch exceeds max size of {MAX_QUEUE_BATCH_SIZE}, consider breaking it into parts") + raise ValueError( + f"Batch exceeds max size of {MAX_QUEUE_BATCH_SIZE}, consider breaking it into parts" + ) query = """mutation %sPyApi($projectId: ID!, $dataRowIds: [ID!]!) { project(where: {id: $projectId}) { diff --git a/tests/data/annotation_types/data/test_text.py b/tests/data/annotation_types/data/test_text.py index 4bca4f939..35dc20a28 100644 --- a/tests/data/annotation_types/data/test_text.py +++ b/tests/data/annotation_types/data/test_text.py @@ -1,3 +1,5 @@ +import os + import pytest from pydantic import ValidationError @@ -22,11 +24,13 @@ def test_url(): assert len(text) == 3541 -def test_file(): - file_path = "tests/data/assets/sample_text.txt" - text_data = TextData(file_path=file_path) - text = text_data.value - assert len(text) == 3541 +def test_file(tmpdir): + content = "foo bar baz" + file = "hello.txt" + dir = tmpdir.mkdir('data') + dir.join(file).write(content) + text_data = TextData(file_path=os.path.join(dir.strpath, file)) + assert len(text_data.value) == len(content) def test_ref(): From e0ed0b67103c044eaa14f5d45b514eb00c68b3bf Mon Sep 17 00:00:00 2001 From: gdj0nes Date: Mon, 8 Nov 2021 18:17:53 -0600 Subject: [PATCH 17/17] FIX: import --- tests/data/annotation_types/geometry/test_rectangle.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/data/annotation_types/geometry/test_rectangle.py b/tests/data/annotation_types/geometry/test_rectangle.py index fa13deebc..d8586aeb7 100644 --- a/tests/data/annotation_types/geometry/test_rectangle.py +++ b/tests/data/annotation_types/geometry/test_rectangle.py @@ -1,3 +1,4 @@ +import cv2 import pytest from pydantic import ValidationError