diff --git a/CHANGELOG.md b/CHANGELOG.md index 4aa275ce5..befc05758 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +# Version 3.21.0 +## Added + * Projects can be created with a `media_type` + * Added `media_type` attribute to `Project` + * New `MediaType` enumeration + +## Fix + * Added back the mimetype to datarow bulk uploads for orgs that require delegated access + # Version 3.20.1 (2022-05-02) ## Updated * Ontology Classification `scope` field is only set for top level classifications diff --git a/examples/basics/projects.ipynb b/examples/basics/projects.ipynb index 7e918dbab..421fab49b 100644 --- a/examples/basics/projects.ipynb +++ b/examples/basics/projects.ipynb @@ -2,15 +2,16 @@ "cells": [ { "cell_type": "markdown", + "metadata": {}, "source": [ "\n", " \n", "" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "\n", "\n", "" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "# Projects" - ], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "* A project can be thought of as a specific labeling task on a set of labels\n", "* That set of labels is defined by the datasets attached to the project\n", @@ -41,123 +42,129 @@ "\n", "** Note that there is a lot of advanced usage that is not covered in this notebook. See project_setup for those functions.\n", "* Also note that deprecated functions are not explained here." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 1, + "metadata": {}, + "outputs": [], "source": [ "!pip install labelbox" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 2, + "metadata": {}, + "outputs": [], "source": [ "from labelbox import Client\n", + "from labelbox.schema.media_type import MediaType\n", "import os" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "# API Key and Client\n", "Provide a valid api key below in order to properly connect to the Labelbox Client." - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 5, + "metadata": {}, + "outputs": [], "source": [ "# Add your api key\n", "API_KEY = None\n", "client = Client(api_key=API_KEY)" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Create\n" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 6, + "metadata": {}, + "outputs": [], "source": [ - "# Creates an empty project\n", + "# Creates an empty project without a media type\n", "project = client.create_project(name=\"my-test-project\",\n", - " description=\"a description\")" - ], - "outputs": [], - "metadata": {} + " description=\"a description\")\n", + "\n", + "# Creates an empty project a media type\n", + "project = client.create_project(name=\"my-test-project\",\n", + " description=\"a description\",\n", + " media_type=MediaType.Image)" + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Read" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Note the project is not setup (so a lot of these fiels are empty). Follow the project setup workflow\n", "print(\"Project is not setup yet:\", project.setup_complete is None)\n", "print(\"Project name:\", project.name)\n", "print(\"Project description:\", project.description)\n", + "print(\"Media Type:\", project.media_type)\n", "print(\"Dataset:\", list(project.datasets()))\n", "print(\"Ontology:\", project.ontology().normalized)\n", "print(\"Benchmarks:\", project.benchmarks())" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Update\n", "\n" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Attach dataset\n", "ds = client.create_dataset(name=\"test-ds\")\n", "project.datasets.connect(ds)\n", "print([ds.name for ds in project.datasets()])\n", "ds.delete()" - ], - "outputs": [], - "metadata": {} + ] }, { "cell_type": "markdown", + "metadata": {}, "source": [ "### Delete" - ], - "metadata": {} + ] }, { "cell_type": "code", "execution_count": 9, + "metadata": {}, + "outputs": [], "source": [ "project.delete()" - ], - "outputs": [], - "metadata": {} + ] } ], "metadata": { @@ -181,4 +188,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/labelbox/__init__.py b/labelbox/__init__.py index 0679c4e33..7d4e16315 100644 --- a/labelbox/__init__.py +++ b/labelbox/__init__.py @@ -1,11 +1,11 @@ name = "labelbox" -__version__ = "3.20.1" +__version__ = "3.21.0" import sys import warnings if sys.version_info < (3, 7): - warnings.warn("""Python 3.6 will no longer be actively supported + warnings.warn("""Python 3.6 will no longer be actively supported starting 06/01/2022. Please upgrade to Python 3.7 or higher.""") from labelbox.client import Client @@ -33,3 +33,4 @@ from labelbox.schema.iam_integration import IAMIntegration from labelbox.schema.resource_tag import ResourceTag from labelbox.schema.project_resource_tag import ProjectResourceTag +from labelbox.schema.media_type import MediaType diff --git a/labelbox/client.py b/labelbox/client.py index 49d3d5d1b..133fb4a40 100644 --- a/labelbox/client.py +++ b/labelbox/client.py @@ -32,6 +32,8 @@ from labelbox.schema.project import Project from labelbox.schema.role import Role +from labelbox.schema.media_type import MediaType + logger = logging.getLogger(__name__) _LABELBOX_API_KEY = "LABELBOX_API_KEY" @@ -611,6 +613,15 @@ def create_project(self, **kwargs) -> Project: InvalidAttributeError: If the Project type does not contain any of the attribute names given in kwargs. """ + media_type = kwargs.get("media_type") + if media_type: + if MediaType.is_supported(media_type): + kwargs["media_type"] = media_type.value + else: + raise TypeError(f"{media_type} is not a valid media type. Use" + f" any of {MediaType.get_supported_members()}" + " from MediaType. Example: MediaType.Image.") + return self._create(Entity.Project, kwargs) def get_roles(self) -> List[Role]: diff --git a/labelbox/orm/db_object.py b/labelbox/orm/db_object.py index 41bfd7f5a..cc10088c8 100644 --- a/labelbox/orm/db_object.py +++ b/labelbox/orm/db_object.py @@ -69,7 +69,7 @@ def _set_field_values(self, field_values): "Failed to convert value '%s' to datetime for " "field %s", value, field) elif isinstance(field.field_type, Field.EnumType): - value = field.field_type.enum_cls[value] + value = field.field_type.enum_cls(value) setattr(self, field.name, value) def __repr__(self): diff --git a/labelbox/schema/__init__.py b/labelbox/schema/__init__.py index 56fddda62..09d872621 100644 --- a/labelbox/schema/__init__.py +++ b/labelbox/schema/__init__.py @@ -20,3 +20,4 @@ import labelbox.schema.data_row_metadata import labelbox.schema.batch import labelbox.schema.iam_integration +import labelbox.schema.media_type diff --git a/labelbox/schema/dataset.py b/labelbox/schema/dataset.py index 0a5d45251..6fe8ec86e 100644 --- a/labelbox/schema/dataset.py +++ b/labelbox/schema/dataset.py @@ -314,7 +314,9 @@ def convert_item(item): items = [future.result() for future in as_completed(futures)] # Prepare and upload the desciptor file data = json.dumps(items) - return self.client.upload_data(data) + return self.client.upload_data(data, + content_type="application/json", + filename="json_import.json") def data_rows_for_external_id(self, external_id, diff --git a/labelbox/schema/media_type.py b/labelbox/schema/media_type.py new file mode 100644 index 000000000..c4e139a67 --- /dev/null +++ b/labelbox/schema/media_type.py @@ -0,0 +1,46 @@ +from enum import Enum + + +class MediaType(Enum): + """add DOCUMENT, GEOSPATIAL_TILE, SIMPLE_TILE to match the UI choices""" + Audio = "AUDIO" + Conversational = "CONVERSATIONAL" + Dicom = "DICOM" + Document = "PDF" + Geospatial_Tile = "TMS_GEO" + Image = "IMAGE" + Json = "JSON" + Pdf = "PDF" + Simple_Tile = "TMS_SIMPLE" + Text = "TEXT" + Tms_Geo = "TMS_GEO" + Tms_Simple = "TMS_SIMPLE" + Video = "VIDEO" + Unknown = "UNKNOWN" + Unsupported = "UNSUPPORTED" + + @classmethod + def _missing_(cls, name): + """Handle missing null data types for projects + created without setting allowedMediaType + Handle upper case names for compatibility with + the GraphQL""" + + if name is None: + return cls.Unknown + + for member in cls.__members__: + if member.name == name.upper(): + return member + + @classmethod + def is_supported(cls, value): + return isinstance(value, + cls) and value not in [cls.Unknown, cls.Unsupported] + + @classmethod + def get_supported_members(cls): + return [ + item for item in cls.__members__ + if item not in ["Unknown", "Unsupported"] + ] diff --git a/labelbox/schema/project.py b/labelbox/schema/project.py index 31b2af319..7c461da70 100644 --- a/labelbox/schema/project.py +++ b/labelbox/schema/project.py @@ -1,4 +1,4 @@ -import enum +from enum import Enum import json import logging import time @@ -19,6 +19,7 @@ from labelbox.orm.model import Entity, Field, Relationship from labelbox.pagination import PaginatedCollection from labelbox.schema.resource_tag import ResourceTag +from labelbox.schema.media_type import MediaType if TYPE_CHECKING: from labelbox import BulkImportRequest @@ -55,7 +56,6 @@ class Project(DbObject, Updateable, Deletable): datasets (Relationship): `ToMany` relationship to Dataset created_by (Relationship): `ToOne` relationship to User organization (Relationship): `ToOne` relationship to Organization - reviews (Relationship): `ToMany` relationship to Review labeling_frontend (Relationship): `ToOne` relationship to LabelingFrontend labeling_frontend_options (Relationship): `ToMany` relationship to LabelingFrontendOptions labeling_parameter_overrides (Relationship): `ToMany` relationship to LabelingParameterOverride @@ -63,6 +63,7 @@ class Project(DbObject, Updateable, Deletable): benchmarks (Relationship): `ToMany` relationship to Benchmark ontology (Relationship): `ToOne` relationship to Ontology """ + name = Field.String("name") description = Field.String("description") updated_at = Field.DateTime("updated_at") @@ -71,6 +72,8 @@ class Project(DbObject, Updateable, Deletable): last_activity_time = Field.DateTime("last_activity_time") auto_audit_number_of_labels = Field.Int("auto_audit_number_of_labels") auto_audit_percentage = Field.Float("auto_audit_percentage") + # Bind data_type and allowedMediaTYpe using the GraphQL type MediaType + media_type = Field.Enum(MediaType, "media_type", "allowedMediaType") # Relationships datasets = Relationship.ToMany("Dataset", True) @@ -85,7 +88,7 @@ class Project(DbObject, Updateable, Deletable): benchmarks = Relationship.ToMany("Benchmark", False) ontology = Relationship.ToOne("Ontology", True) - class QueueMode(enum.Enum): + class QueueMode(Enum): Batch = "Batch" Dataset = "Dataset" @@ -94,6 +97,15 @@ def update(self, **kwargs): if mode: self._update_queue_mode(mode) + media_type = kwargs.get("media_type") + if media_type: + if MediaType.is_supported(media_type): + kwargs["media_type"] = media_type.value + else: + raise TypeError(f"{media_type} is not a valid media type. Use" + f" any of {MediaType.get_supported_members()}" + " from MediaType. Example: MediaType.Image.") + return super().update(**kwargs) def members(self) -> PaginatedCollection: diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py index e464a30a8..5fd3a6271 100644 --- a/tests/integration/test_dataset.py +++ b/tests/integration/test_dataset.py @@ -109,3 +109,19 @@ def test_data_row_export(dataset, image_url): result = list(dataset.export_data_rows()) assert len(result) == n_data_rows assert set(result) == ids + + +def test_create_descriptor_file(dataset): + import unittest.mock as mock + with mock.patch.object(dataset.client, + 'upload_data', + wraps=dataset.client.upload_data) as upload_data_spy: + dataset._create_descriptor_file(items=[{'row_data': 'some text...'}]) + upload_data_spy.assert_called() + call_args, call_kwargs = upload_data_spy.call_args_list[0][ + 0], upload_data_spy.call_args_list[0][1] + assert call_args == ('[{"data": "some text..."}]',) + assert call_kwargs == { + 'content_type': 'application/json', + 'filename': 'json_import.json' + } diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index 2bfea262b..80423a6fc 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -6,6 +6,7 @@ from labelbox import Project, LabelingFrontend, Dataset from labelbox.exceptions import InvalidQueryError +from labelbox.schema.media_type import MediaType def test_project(client, rand_gen): @@ -219,4 +220,22 @@ def test_batches(configured_project: Project, dataset: Dataset, image_url): configured_project.create_batch(batch_two, [data_rows[1]]) names = set([batch.name for batch in list(configured_project.batches())]) - assert names == set([batch_one, batch_two]) \ No newline at end of file + assert names == set([batch_one, batch_two]) + + +def test_media_type(client, configured_project: Project, rand_gen): + # Existing project with no media_type + assert isinstance(configured_project.media_type, MediaType) + + # Update test + project = client.create_project(name=rand_gen(str)) + project.update(media_type=MediaType.Image) + assert project.media_type == MediaType.Image + project.delete() + + for media_type in MediaType.get_supported_members(): + + project = client.create_project(name=rand_gen(str), + media_type=MediaType[media_type]) + assert project.media_type == MediaType[media_type] + project.delete() \ No newline at end of file