Labelbox · kkim-labelbox · Nov 29, 2022 · Nov 14, 2022 · Nov 14, 2022 · Nov 15, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,16 @@
 # Changelog
 
+# Version 3.31.0 (2022-11-28)
+### Added
+* Added `client.clear_global_keys()` to remove global keys from their associated data rows
+* Added a new attribute `confidence` to `AnnotationObject` and `ClassificationAnswer` for Model Error Analysis
+
+### Fixed
+* Fixed `project.create_batch()` to work with both data_row_ids and data_row objects
+
 # Version 3.30.1 (2022-11-16)
+### Added
+* Added step to `project.create_batch()` to wait for data rows to finish processing
 ### Fixed
 * Running `project.setup_editor()` multiple times no longer resets the ontology, and instead raises an error if the editor is already set up for the project
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -21,7 +21,7 @@
 copyright = '2021, Labelbox'
 author = 'Labelbox'
 
-release = '3.30.1'
+release = '3.31.0'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/examples/README.md b/examples/README.md
@@ -62,6 +62,7 @@ Learn more about annotation types in the [docs](https://docs.labelbox.com/docs/a
 | Text Annotation Import | [Github](annotation_import/text.ipynb)               | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/text.ipynb) |
 | Tiled Imagery Annotation Import    | [Github](annotation_import/tiled.ipynb)   | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/tiled.ipynb) |
 | Video Model-Assisted Labeling                    | [Github](annotation_import/video.ipynb)             | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/video.ipynb) | 
+| PDF Annotation Import | [Github](annotation_import/pdf.ipynb)               | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/pdf.ipynb) |
 ------
 
 ## [Project Configuration](project_configuration)

diff --git a/examples/annotation_import/pdf.ipynb b/examples/annotation_import/pdf.ipynb
@@ -16,12 +16,12 @@
    "metadata": {},
    "source": [
     "<td>\n",
-    "<a href=\"https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/pdf_mal.ipynb\" target=\"_blank\"><img\n",
+    "<a href=\"https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/pdf.ipynb\" target=\"_blank\"><img\n",
     "src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
     "</td>\n",
     "\n",
     "<td>\n",
-    "<a href=\"https://github.com/Labelbox/labelbox-python/tree/develop/examples/annotation_import/pdf_mal.ipynb\" target=\"_blank\"><img\n",
+    "<a href=\"https://github.com/Labelbox/labelbox-python/tree/develop/examples/annotation_import/pdf.ipynb\" target=\"_blank\"><img\n",
     "src=\"https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white\" alt=\"GitHub\"></a>\n",
     "</td>"
    ]
@@ -447,14 +447,6 @@
     "# This will provide information only after the upload_job is complete, so we do not need to worry about having to rerun\n",
     "print(\"Errors:\", upload_job.errors)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ba9dc45a",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

diff --git a/labelbox/__init__.py b/labelbox/__init__.py
@@ -1,5 +1,5 @@
 name = "labelbox"
-__version__ = "3.30.1"
+__version__ = "3.31.0"
 
 from labelbox.client import Client
 from labelbox.schema.project import Project

diff --git a/labelbox/client.py b/labelbox/client.py
@@ -1229,6 +1229,102 @@ def _format_failed_rows(rows: List[str],
                 )
             time.sleep(sleep_time)
 
+    def clear_global_keys(
+            self,
+            global_keys: List[str],
+            timeout_seconds=60) -> Dict[str, Union[str, List[Any]]]:
+        """
+        Clears global keys for the data rows tha correspond to the global keys provided.
+
+        Args:
+            A list of global keys
+        Returns:
+            Dictionary containing 'status', 'results' and 'errors'.
+
+            'Status' contains the outcome of this job. It can be one of
+            'Success', 'Partial Success', or 'Failure'.
+
+            'Results' contains a list global keys that were successfully cleared.
+
+            'Errors' contains a list of global_keys correspond to the data rows that could not be 
+            modified, accessed by the user, or not found. 
+        Examples:
+            >>> job_result = client.get_data_row_ids_for_global_keys(["key1","key2"])
+            >>> print(job_result['status'])
+            Partial Success
+            >>> print(job_result['results'])
+            ['cl7tv9wry00hlka6gai588ozv', 'cl7tv9wxg00hpka6gf8sh81bj']
+            >>> print(job_result['errors'])
+            [{'global_key': 'asdf', 'error': 'Data Row not found'}]
+        """
+
+        def _format_failed_rows(rows: List[str],
+                                error_msg: str) -> List[Dict[str, str]]:
+            return [{'global_key': r, 'error': error_msg} for r in rows]
+
+        # Start get data rows for global keys job
+        query_str = """mutation clearGlobalKeysPyApi($globalKeys: [ID!]!) {
+            clearGlobalKeys(where: {ids: $globalKeys}) { jobId}}
+            """
+        params = {"globalKeys": global_keys}
+        clear_global_keys_job = self.execute(query_str, params)
+
+        # Query string for retrieving job status and result, if job is done
+        result_query_str = """query clearGlobalKeysResultPyApi($jobId: ID!) {
+            clearGlobalKeysResult(jobId: {id: $jobId}) { data { 
+                clearedGlobalKeys
+                failedToClearGlobalKeys
+                notFoundGlobalKeys
+                accessDeniedGlobalKeys
+                } jobStatus}}
+            """
+        result_params = {
+            "jobId": clear_global_keys_job["clearGlobalKeys"]["jobId"]
+        }
+        # Poll job status until finished, then retrieve results
+        sleep_time = 2
+        start_time = time.time()
+        while True:
+            res = self.execute(result_query_str, result_params)
+            if res["clearGlobalKeysResult"]['jobStatus'] == "COMPLETE":
+                data = res["clearGlobalKeysResult"]['data']
+                results, errors = [], []
+                results.extend(data['clearedGlobalKeys'])
+                errors.extend(
+                    _format_failed_rows(data['failedToClearGlobalKeys'],
+                                        "Clearing global key failed"))
+                errors.extend(
+                    _format_failed_rows(
+                        data['notFoundGlobalKeys'],
+                        "Failed to find data row matching provided global key"))
+                errors.extend(
+                    _format_failed_rows(
+                        data['accessDeniedGlobalKeys'],
+                        "Denied access to modify data row matching provided global key"
+                    ))
+
+                if not errors:
+                    status = CollectionJobStatus.SUCCESS.value
+                elif errors and len(results) > 0:
+                    status = CollectionJobStatus.PARTIAL_SUCCESS.value
+                else:
+                    status = CollectionJobStatus.FAILURE.value
+
+                if errors:
+                    logger.warning(
+                        "There are errors present. Please look at 'errors' in the returned dict for more details"
+                    )
+
+                return {"status": status, "results": results, "errors": errors}
+            elif res["clearGlobalKeysResult"]['jobStatus'] == "FAILED":
+                raise labelbox.exceptions.LabelboxError(
+                    "Job clearGlobalKeys failed.")
+            current_time = time.time()
+            if current_time - start_time > timeout_seconds:
+                raise labelbox.exceptions.TimeoutError(
+                    "Timed out waiting for clear_global_keys job to complete.")
+            time.sleep(sleep_time)
+
     def get_catalog_slice(self, slice_id) -> CatalogSlice:
         """
         Fetches a Catalog Slice by ID.

diff --git a/labelbox/data/annotation_types/annotation.py b/labelbox/data/annotation_types/annotation.py
@@ -1,6 +1,8 @@
 import abc
 from typing import Any, Dict, List, Optional, Union
 
+from labelbox.data.mixins import ConfidenceNotSupportedMixin, ConfidenceMixin
+
 from .classification import Checklist, Dropdown, Radio, Text
 from .feature import FeatureSchema
 from .geometry import Geometry, Rectangle, Point
@@ -31,7 +33,7 @@ class ClassificationAnnotation(BaseAnnotation):
     value: Union[Text, Checklist, Radio, Dropdown]
 
 
-class ObjectAnnotation(BaseAnnotation):
+class ObjectAnnotation(BaseAnnotation, ConfidenceMixin):
     """Generic localized annotation (non classifications)
 
     >>> ObjectAnnotation(
@@ -53,7 +55,7 @@ class ObjectAnnotation(BaseAnnotation):
     classifications: List[ClassificationAnnotation] = []
 
 
-class VideoObjectAnnotation(ObjectAnnotation):
+class VideoObjectAnnotation(ObjectAnnotation, ConfidenceNotSupportedMixin):
     """Video object annotation
 
     >>> VideoObjectAnnotation(
@@ -76,6 +78,7 @@ class VideoObjectAnnotation(ObjectAnnotation):
         classifications (List[ClassificationAnnotation]) = []
         extra (Dict[str, Any])
     """
+
     frame: int
     keyframe: bool
     segment_index: Optional[int] = None

diff --git a/labelbox/data/annotation_types/classification/classification.py b/labelbox/data/annotation_types/classification/classification.py
@@ -1,6 +1,8 @@
 from typing import Any, Dict, List, Union, Optional
 import warnings
 
+from labelbox.data.mixins import ConfidenceMixin
+
 try:
     from typing import Literal
 except:
@@ -20,7 +22,7 @@ def dict(self, *args, **kwargs):
         return res
 
 
-class ClassificationAnswer(FeatureSchema):
+class ClassificationAnswer(FeatureSchema, ConfidenceMixin):
     """
     - Represents a classification option.
     - Because it inherits from FeatureSchema

diff --git a/labelbox/data/mixins.py b/labelbox/data/mixins.py
@@ -0,0 +1,32 @@
+from typing import Optional
+
+from pydantic import BaseModel, validator
+
+from labelbox.exceptions import ConfidenceNotSupportedException
+
+
+class ConfidenceMixin(BaseModel):
+    confidence: Optional[float] = None
+
+    @validator('confidence')
+    def confidence_valid_float(cls, value):
+        if value is None:
+            return value
+        if not isinstance(value, (int, float)) or not 0 <= value <= 1:
+            raise ValueError('must be a number within [0,1] range')
+        return value
+
+    def dict(self, *args, **kwargs):
+        res = super().dict(*args, **kwargs)
+        if 'confidence' in res and res['confidence'] is None:
+            res.pop('confidence')
+        return res
+
+
+class ConfidenceNotSupportedMixin:
+
+    def __new__(cls, *args, **kwargs):
+        if 'confidence' in kwargs:
+            raise ConfidenceNotSupportedException(
+                'Confidence is not supported for this annotaiton type yet')
+        return super().__new__(cls)
diff --git a/labelbox/data/serialization/ndjson/classification.py b/labelbox/data/serialization/ndjson/classification.py
@@ -1,6 +1,7 @@
 from typing import Any, Dict, List, Union, Optional
 
 from pydantic import BaseModel, Field, root_validator
+from labelbox.data.mixins import ConfidenceMixin
 
 from labelbox.utils import camel_case
 from ...annotation_types.annotation import ClassificationAnnotation, VideoClassificationAnnotation
@@ -10,7 +11,7 @@
 from .base import NDAnnotation
 
 
-class NDFeature(BaseModel):
+class NDFeature(ConfidenceMixin):
     name: Optional[str] = None
     schema_id: Optional[Cuid] = None
 
@@ -41,7 +42,7 @@ class FrameLocation(BaseModel):
 
 
 class VideoSupported(BaseModel):
-    #Note that frames are only allowed as top level inferences for video
+    # Note that frames are only allowed as top level inferences for video
     frames: Optional[List[FrameLocation]] = None
 
     def dict(self, *args, **kwargs):
@@ -70,15 +71,18 @@ class NDChecklistSubclass(NDFeature):
     def to_common(self) -> Checklist:
         return Checklist(answer=[
             ClassificationAnswer(name=answer.name,
-                                 feature_schema_id=answer.schema_id)
+                                 feature_schema_id=answer.schema_id,
+                                 confidence=answer.confidence)
             for answer in self.answer
         ])
 
     @classmethod
     def from_common(cls, checklist: Checklist, name: str,
                     feature_schema_id: Cuid) -> "NDChecklistSubclass":
         return cls(answer=[
-            NDFeature(name=answer.name, schema_id=answer.feature_schema_id)
+            NDFeature(name=answer.name,
+                      schema_id=answer.feature_schema_id,
+                      confidence=answer.confidence)
             for answer in checklist.answer
         ],
                    name=name,
@@ -95,19 +99,22 @@ class NDRadioSubclass(NDFeature):
     answer: NDFeature
 
     def to_common(self) -> Radio:
-        return Radio(answer=ClassificationAnswer(
-            name=self.answer.name, feature_schema_id=self.answer.schema_id))
+        return Radio(
+            answer=ClassificationAnswer(name=self.answer.name,
+                                        feature_schema_id=self.answer.schema_id,
+                                        confidence=self.answer.confidence))
 
     @classmethod
     def from_common(cls, radio: Radio, name: str,
                     feature_schema_id: Cuid) -> "NDRadioSubclass":
         return cls(answer=NDFeature(name=radio.answer.name,
-                                    schema_id=radio.answer.feature_schema_id),
+                                    schema_id=radio.answer.feature_schema_id,
+                                    confidence=radio.answer.confidence),
                    name=name,
                    schema_id=feature_schema_id)
 
 
-### ====== End of subclasses
+# ====== End of subclasses
 
 
 class NDText(NDAnnotation, NDTextSubclass):
@@ -133,7 +140,9 @@ def from_common(
             extra: Dict[str, Any], data: Union[VideoData, TextData,
                                                ImageData]) -> "NDChecklist":
         return cls(answer=[
-            NDFeature(name=answer.name, schema_id=answer.feature_schema_id)
+            NDFeature(name=answer.name,
+                      schema_id=answer.feature_schema_id,
+                      confidence=answer.confidence)
             for answer in checklist.answer
         ],
                    data_row={'id': data.uid},
@@ -150,7 +159,8 @@ def from_common(cls, radio: Radio, name: str, feature_schema_id: Cuid,
                     extra: Dict[str, Any], data: Union[VideoData, TextData,
                                                        ImageData]) -> "NDRadio":
         return cls(answer=NDFeature(name=radio.answer.name,
-                                    schema_id=radio.answer.feature_schema_id),
+                                    schema_id=radio.answer.feature_schema_id,
+                                    confidence=radio.answer.confidence),
                    data_row={'id': data.uid},
                    name=name,
                    schema_id=feature_schema_id,
@@ -241,6 +251,11 @@ def lookup_classification(
         }.get(type(annotation.value))
 
 
-NDSubclassificationType = Union[NDRadioSubclass, NDChecklistSubclass,
+# Make sure to keep NDChecklistSubclass prior to NDRadioSubclass in the list,
+# otherwise list of answers gets parsed by NDRadio whereas NDChecklist must be used
+NDSubclassificationType = Union[NDChecklistSubclass, NDRadioSubclass,
                                 NDTextSubclass]
-NDClassificationType = Union[NDRadio, NDChecklist, NDText]
+
+# Make sure to keep NDChecklist prior to NDRadio in the list,
+# otherwise list of answers gets parsed by NDRadio whereas NDChecklist must be used
+NDClassificationType = Union[NDChecklist, NDRadio, NDText]