diff --git a/examples/annotation_import/basics.ipynb b/examples/annotation_import/basics.ipynb index b420af893..f51083d05 100644 --- a/examples/annotation_import/basics.ipynb +++ b/examples/annotation_import/basics.ipynb @@ -8,7 +8,7 @@ }, "source": [ "\n", - " \n", + " \n", "" ] }, @@ -39,11 +39,9 @@ "source": [ "# Annotation Imports\n", "* This notebook is a high level introduction demonstrating multiple ways to upload your annotations. This will cover the following:\n", - " * Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", - " * Label Import - used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", - "\n", - "\n", - "* For complete examples see image_mal.ipynb or ner_mal.ipynb" + " * Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", + " * Label Import is used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", + "\n" ] }, { @@ -105,20 +103,30 @@ "execution_count": null, "id": "guided-arthritis", "metadata": { - "id": "guided-arthritis" + "id": "guided-arthritis", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "717a953f-3857-4af0-aadb-801e98d2e044" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3.28.0\n" + ] + } + ], "source": [ - "from labelbox.schema.ontology import OntologyBuilder, Tool\n", - "from labelbox import Client, LabelingFrontend, LabelImport, MALPredictionImport\n", + "from typing import List\n", + "from labelbox import Client, LabelingFrontend, LabelImport, MALPredictionImport, OntologyBuilder, Tool, MediaType\n", + "from labelbox.schema.queue_mode import QueueMode\n", "from labelbox.data.annotation_types import (\n", - " Label, ImageData, ObjectAnnotation, Rectangle, Point, Radio,\n", - " ClassificationAnnotation, ClassificationAnswer\n", + " Label, ImageData, ObjectAnnotation, Rectangle, Point, LabelList\n", ")\n", "from labelbox.data.serialization import NDJsonConverter\n", - "from labelbox.schema.media_type import MediaType\n", - "import uuid\n", - "import json" + "import uuid\n" ] }, { @@ -137,21 +145,12 @@ "execution_count": null, "id": "preliminary-benchmark", "metadata": { - "id": "preliminary-benchmark", - "outputId": "8cf16a44-d0b8-477f-b361-43865b2fc572" + "id": "preliminary-benchmark" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:labelbox.client:Initializing Labelbox client at 'https://api.labelbox.com/graphql'\n" - ] - } - ], + "outputs": [], "source": [ "# Add your api key\n", - "API_KEY = None\n", + "API_KEY= None\n", "client = Client(api_key=API_KEY)" ] }, @@ -189,41 +188,174 @@ "We will be creating two projects, one for model-assisted labeling, and one for label imports" ] }, + { + "cell_type": "code", + "source": [ + "# Project defaults to batch mode with benchmark quality settings queue_mode argument is not provided\n", + "mal_project = client.create_project(name=\"mal_annotation_import_demo\",\n", + " queue_mode=QueueMode.Batch, \n", + " auto_audit_percentage=1,\n", + " auto_audit_number_of_labels=1,\n", + " media_type=MediaType.Image)\n", + "\n", + "li_project = client.create_project(name=\"label_import_project_demo\",\n", + " queue_mode=QueueMode.Batch,\n", + " auto_audit_percentage=1,\n", + " auto_audit_number_of_labels=1,\n", + " media_type=MediaType.Image)\n", + "\n", + "# Confirm succesfull creation of project \n", + "print(\"Project Name: \", mal_project.name , \" Project Id: \", mal_project.uid)\n", + "print( \"Project Name:\", li_project.name , \"Project Id: \", li_project.uid) \n", + "\n", + "\n", + "######################### DATASET CONSENSUS OPTION ########################\n", + "#Note that dataset base projects will be deprecated in the near future.\n", + "\n", + "#To use Datasets/Consensus instead of Batches/Benchmarks use the following query: \n", + "#In this case, 10% of all data rows need to be annotated by three labelers.\n", + "\n", + "# dataset_project = client.create_project(name=\"datasets-test-project\",\n", + "# description=\"a description\",\n", + "# media_type=MediaType.Image,\n", + "# auto_audit_percentage=0.1,\n", + "# auto_audit_number_of_labels=3,\n", + "# queue_mode=QueueMode.Dataset)\n", + "\n", + "# dataset_project.datasets.connect(dataset)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IrXBm66Qa-Ir", + "outputId": "ba57e9a1-0c28-4b58-fcf6-13e78ae44441" + }, + "id": "IrXBm66Qa-Ir", + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Project Name: mal_annotation_import_demo Project Id: cl9sjj4m85by408z508t0gzva\n", + "Project Name: label_import_project_demo Project Id: cl9sjj4wtcwvh07xy3zisa57k\n" + ] + } + ] + }, { "cell_type": "code", "execution_count": null, "id": "advanced-trash", "metadata": { - "id": "advanced-trash" + "id": "advanced-trash", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2fa388f4-20ec-4740-de00-b6f0e0e399c3" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "None\n", + "ERRORS: None\n", + "RESULT URL: https://storage.labelbox.com/cl3ahv73w1891087qbwzs3edd%2Fdata-row-imports-results%2Fcl9sjj6m62sq707yd7v3v1v5e_cl9sjj7iicww607xycahnhu7k.json?Expires=1667050751352&KeyName=labelbox-assets-key-3&Signature=VLFJXZ-8VdrnECOEzf-bJgxOQlc\n" + ] + } + ], "source": [ - "# Only update this if you have an on-prem deployment\n", - "ontology_builder = OntologyBuilder(tools=[\n", - " Tool(tool=Tool.Type.BBOX, name=\"box\")\n", - "])\n", "\n", + "dataset = client.create_dataset(name=\"annotation_import_demo_dataset\")\n", "\n", - "mal_project = client.create_project(\n", - " name=\"image_mal_project\",\n", - " media_type=MediaType.Image\n", - ")\n", - "li_project = client.create_project(\n", - " name=\"image_label_import_project\",\n", - " media_type=MediaType.Image\n", + "test_imgs_url = [\n", + " {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--0le83jkA7Hq7N1fvIvTw.jpg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1() \n", + " },\n", + " {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--48MAqc82-bZdgGpaiexA.jpg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1() \n", + " },\n", + " {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--A4b2SOWVi4KL_ryAAtTg.jpg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1() \n", + " },\n", + " {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--DuAQ9qBB2mmsC7hV2Kzg.jpg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1()\n", + " },\n", + " {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--FValmNpFJ8yo8X7uWODA.jpg\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1()\n", + " }\n", + "\n", + "]\n", + "\n", + "\n", + "data_rows = dataset.create_data_rows(test_imgs_url)\n", + "data_rows.wait_till_done()\n", + "print(data_rows.failed_data_rows)\n", + "print(\"ERRORS: \" , data_rows.errors)\n", + "print(\"RESULT URL: \", data_rows.result_url)" + ] + }, + { + "cell_type": "code", + "source": [ + "# Setup Batches and Ontology\n", + "\n", + "# We need the data row ID to create a batch\n", + "batch_datarows = [dr.uid for dr in list(dataset.export_data_rows())]\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch_mal = mal_project.create_batch(\n", + " \"first-batch-MAL-demo\", # Each batch in a project must have a unique name\n", + " batch_datarows, # A list of data rows or data row ids\n", + " 5 # priority between 1(Highest) - 5(lowest)\n", ")\n", "\n", + "# Create a batch to send to your LI project\n", + "batch_li = li_project.create_batch(\n", + " \"first-batch-LI-demo\", # Each batch in a project must have a unique name\n", + " batch_datarows, # A list of data rows or data row ids\n", + " 5 # priority between 1(Highest) - 5(lowest)\n", + ")\n", "\n", - "dataset = client.create_dataset(name=\"annotation_import_demo_dataset\")\n", - "test_img_url = \"https://raw.githubusercontent.com/Labelbox/labelbox-python/develop/examples/assets/2560px-Kitano_Street_Kobe01s5s4110.jpg\"\n", - "data_row = dataset.create_data_row(row_data=test_img_url)\n", - "editor = next(client.get_labeling_frontends(where=LabelingFrontend.name == \"Editor\"))\n", + "# Setup your ontology / labeling editor\n", + "# Only update this if you have an on-prem deployment\n", + "ontology_builder = OntologyBuilder(tools=[\n", + " Tool(tool=Tool.Type.BBOX, name=\"box\")\n", + "])\n", "\n", + "editor = next(client.get_labeling_frontends(where=LabelingFrontend.name == \"Editor\")) # Unless using a custom editor,\n", + "# Connect your ontology and editor to your MAL and LI project\n", "mal_project.setup(editor, ontology_builder.asdict())\n", - "mal_project.datasets.connect(dataset)\n", - "\n", "li_project.setup(editor, ontology_builder.asdict())\n", - "li_project.datasets.connect(dataset)" + "\n", + "print(\"Batch Li: \", batch_li)\n", + "print(\"Batch Mal: \", batch_mal)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8z1EnMmmjyiz", + "outputId": "e6538781-eb47-40cb-97fe-bed5ceab8513" + }, + "id": "8z1EnMmmjyiz", + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Batch Li: \n", + "Batch Mal: \n" + ] + } ] }, { @@ -241,38 +373,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "framed-gateway", "metadata": { - "id": "framed-gateway", - "outputId": "7648c444-4d77-4b10-b8ba-23f181567fcb" + "id": "framed-gateway" }, - "outputs": [ - { - "data": { - "text/plain": [ - "Label(uid=None, data=ImageData(im_bytes=None,file_path=None,url=None,arr=None), annotations=[ObjectAnnotation(name='box', feature_schema_id=None, extra={}, value=Rectangle(extra={}, start=Point(extra={}, x=30.0, y=30.0), end=Point(extra={}, x=200.0, y=200.0)), classifications=[])], extra={})" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# Create a label with the related data and annotations\n", - "# Then we assign the feature schema id based on an existing project's ontology\n", - "# Assignment of the feature schema will be done per each project below\n", - "rectangle = Rectangle(start=Point(x=30,y=30), end=Point(x=200,y=200))\n", - "rectangle_annotation = ObjectAnnotation(value=rectangle, name=\"box\")\n", - "image_data = ImageData(uid=data_row.uid)\n", - "\n", - "label = Label(\n", - " data=image_data,\n", - " annotations = [rectangle_annotation]\n", - ")\n", + "mal_onto = mal_project.ontology().tools()\n", + "li_onto = li_project.ontology().tools()\n", "\n", - "label" + "## create an annotation with schema ids from both ontologies\n", + "rectangle = Rectangle(start=Point(x=100,y=100), end=Point(x=600,y=600))\n", + "\n", + "rectangle_annotation = ObjectAnnotation(value=rectangle, name=\"box\",feature_schema_id=mal_onto[0].feature_schema_id) \n", + "rectangle_annotation_li = ObjectAnnotation(value=rectangle, name=\"box\",feature_schema_id=li_onto[0].feature_schema_id) " ] }, { @@ -308,18 +424,70 @@ "id": "10b19393-920a-45c8-9660-42d8c449b9c2", "metadata": { "id": "10b19393-920a-45c8-9660-42d8c449b9c2", - "outputId": "a93a39de-c8ed-402c-b834-304b1ba8854a" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5f9de2ae-71e1-4a6f-8b7b-6957f65145b0" }, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'uuid': '5babc28e-f41b-405a-9e7e-cd1063e357f6',\n", + " 'dataRow': {'id': 'cl9sjj7qapr4p085n11igcvee'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", + " {'uuid': 'e3e3f68e-4d5d-4d36-904e-165abfa15285',\n", + " 'dataRow': {'id': 'cl9sjj7q9pr4l085n7dzp5d93'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", + " {'uuid': 'f36ec573-515f-4ed4-b09c-fddad3b8e695',\n", + " 'dataRow': {'id': 'cl9sjj7q9pr4h085n6fhz285p'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", + " {'uuid': 'f5373bc3-29aa-492b-9305-c3a23b64d78b',\n", + " 'dataRow': {'id': 'cl9sjj7q9pr4d085nelq436ai'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", + " {'uuid': '3df6e667-d72c-4d4f-9138-b358d06d90e3',\n", + " 'dataRow': {'id': 'cl9sjj7q9pr49085nd03kaouz'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}}]" + ] + }, + "metadata": {}, + "execution_count": 9 + } + ], "source": [ - "mal_label = Label(\n", - " data=image_data,\n", - " annotations = [rectangle_annotation]\n", - ")\n", "\n", - "ndjson_labels = list(NDJsonConverter.serialize([mal_label]))\n", + "# Create a list of labels\n", + "\n", + "mal_ndjson_ls = []\n", "\n", - "ndjson_labels" + "for data_row in list(mal_project.export_queued_data_rows()):\n", + " image_data = ImageData(uid=data_row['id'])\n", + "\n", + " mal_label = Label(\n", + " data=image_data,\n", + " annotations = [rectangle_annotation]\n", + " )\n", + " mal_ndjson_ls.append(mal_label)\n", + "\n", + "mal_ndjson = list(NDJsonConverter.serialize(mal_ndjson_ls))\n", + "## Create your ndjson list of of labels\n", + "mal_ndjson\n" ] }, { @@ -334,8 +502,8 @@ "upload_job = MALPredictionImport.create_from_objects(\n", " client = client, \n", " project_id = mal_project.uid, \n", - " name=\"upload_label_import_job\", \n", - " predictions=ndjson_labels)" + " name=\"upload_mal_import_job_demo\", \n", + " predictions=mal_ndjson)" ] }, { @@ -344,19 +512,15 @@ "id": "welsh-nutrition", "metadata": { "id": "welsh-nutrition", - "outputId": "c13baac2-508b-4018-c594-dc92d6efe8a1" + "outputId": "abcb2693-c28f-4a2b-abe7-fb12dc92fe3a", + "colab": { + "base_uri": "https://localhost:8080/" + } }, "outputs": [ { - "name": "stderr", "output_type": "stream", - "text": [ - "INFO:labelbox.schema.annotation_import:Sleeping for 10 seconds...\n" - ] - }, - { "name": "stdout", - "output_type": "stream", "text": [ "Errors: []\n" ] @@ -364,8 +528,8 @@ ], "source": [ "# Errors will appear for each annotation that failed.\n", - "# Empty list means that there were no errors\n", "# This will provide information only after the upload_job is complete, so we do not need to worry about having to rerun\n", + "upload_job.wait_until_done()\n", "print(\"Errors:\", upload_job.errors)" ] }, @@ -391,27 +555,77 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "c95716d5-a1ee-46fe-8dca-313ce10f104f", "metadata": { "id": "c95716d5-a1ee-46fe-8dca-313ce10f104f", - "outputId": "0e83d5d0-1f51-4903-c777-f9c331781656" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "8cc23989-b3cc-4ead-8cd4-cc7750c6ab4d" }, - "outputs": [], + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'uuid': '2376c909-cbc2-4905-b084-e25b7d21f367',\n", + " 'dataRow': {'id': 'cl9sjj7qapr4p085n11igcvee'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", + " {'uuid': 'd4b6ea84-0130-4302-9a2c-f3364991d7ac',\n", + " 'dataRow': {'id': 'cl9sjj7q9pr4l085n7dzp5d93'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", + " {'uuid': 'f8cdc6c8-f7bc-4500-8385-d796dae7753c',\n", + " 'dataRow': {'id': 'cl9sjj7q9pr4h085n6fhz285p'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", + " {'uuid': '1ccc9a86-fd65-4057-ae57-709a95a0c20f',\n", + " 'dataRow': {'id': 'cl9sjj7q9pr4d085nelq436ai'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", + " {'uuid': '98bae092-0b90-4364-813c-3188d5301816',\n", + " 'dataRow': {'id': 'cl9sjj7q9pr49085nd03kaouz'},\n", + " 'name': 'box',\n", + " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", + " 'classifications': [],\n", + " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}}]" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ], "source": [ - "li_label = Label(\n", - " data=image_data,\n", - " annotations = [rectangle_annotation]\n", - ")\n", + "li_label_list = []\n", + "\n", + "for data_row in list(li_project.export_queued_data_rows()):\n", + " image_data = ImageData(uid=data_row['id'])\n", + "\n", + " li_label = Label(\n", + " data=image_data,\n", + " annotations = [rectangle_annotation_li]\n", + " )\n", + " li_label_list.append(li_label)\n", "\n", - "ndjson_labels = list(NDJsonConverter.serialize([li_label]))\n", + "## Create your ndjson list of of labels\n", + "li_ndjson = list(NDJsonConverter.serialize(li_label_list))\n", "\n", - "ndjson_labels" + "li_ndjson" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "ef24f3cd-9d16-4e13-af25-7ebcda7dd4d2", "metadata": { "id": "ef24f3cd-9d16-4e13-af25-7ebcda7dd4d2" @@ -421,43 +635,39 @@ "upload_job = LabelImport.create_from_objects(\n", " client = client, \n", " project_id = li_project.uid, \n", - " name=\"upload_label_import_job\", \n", - " labels=ndjson_labels)" + " name=\"upload_label_import_job_demo\", \n", + " labels=li_ndjson)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "c8fba013-7bad-4188-bbe6-47f9f857ec66", "metadata": { "id": "c8fba013-7bad-4188-bbe6-47f9f857ec66", - "outputId": "95939547-8c24-431e-dec9-7221686856b5" + "outputId": "9fa5fb80-d5da-421c-854a-2e97ed5ca872", + "colab": { + "base_uri": "https://localhost:8080/" + } }, "outputs": [ { - "name": "stderr", "output_type": "stream", - "text": [ - "INFO:labelbox.schema.annotation_import:Sleeping for 10 seconds...\n" - ] - }, - { "name": "stdout", - "output_type": "stream", "text": [ "Errors: []\n" ] } ], "source": [ - "print(\"Errors:\", upload_job.errors)" + "upload_job.wait_until_done()\n", + "print(\"Errors:\", upload_job.errors)\n" ] } ], "metadata": { "colab": { "collapsed_sections": [], - "name": "basics.ipynb", "provenance": [] }, "kernelspec": {