diff --git a/examples/annotation_import/basics.ipynb b/examples/annotation_import/basics.ipynb deleted file mode 100644 index 775bd52ae..000000000 --- a/examples/annotation_import/basics.ipynb +++ /dev/null @@ -1,693 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "db768cda", - "metadata": { - "id": "db768cda" - }, - "source": [ - "\n", - " \n", - "" - ] - }, - { - "cell_type": "markdown", - "id": "cb5611d0", - "metadata": { - "id": "cb5611d0" - }, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "id": "fundamental-failure", - "metadata": { - "id": "fundamental-failure" - }, - "source": [ - "# Annotation Imports\n", - "* This notebook is a high level introduction demonstrating multiple ways to upload your annotations. This will cover the following:\n", - " * Model-assisted labeling - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission.\n", - " * Label Import is used to provide ground truth labels. These can in turn be used and compared against prediction labels, or used as benchmarks to see how your labelers are doing.\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "registered-parts", - "metadata": { - "id": "registered-parts" - }, - "source": [ - "* For information on what types of annotations are supported per data type, refer to this [documentation](https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended)" - ] - }, - { - "cell_type": "markdown", - "id": "legislative-violence", - "metadata": { - "id": "legislative-violence" - }, - "source": [ - "* Notes:\n", - " * If you are importing more than 1,000 mask annotations at a time, consider submitting separate jobs, as they can take longer than other annotation types to import.\n", - " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." - ] - }, - { - "cell_type": "markdown", - "id": "70072299-2ffe-4ea3-9af1-410d9bfd18cc", - "metadata": { - "id": "70072299-2ffe-4ea3-9af1-410d9bfd18cc" - }, - "source": [ - "# Installs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "pointed-disability", - "metadata": { - "id": "pointed-disability" - }, - "outputs": [], - "source": [ - "!pip install -q 'labelbox[data]'" - ] - }, - { - "cell_type": "markdown", - "id": "a5c271de-1006-400e-a5bb-d466b833b734", - "metadata": { - "id": "a5c271de-1006-400e-a5bb-d466b833b734" - }, - "source": [ - "# Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "guided-arthritis", - "metadata": { - "id": "guided-arthritis", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "717a953f-3857-4af0-aadb-801e98d2e044" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "3.28.0\n" - ] - } - ], - "source": [ - "from typing import List\n", - "from labelbox import Client, LabelingFrontend, LabelImport, MALPredictionImport, OntologyBuilder, Tool, MediaType\n", - "from labelbox.schema.queue_mode import QueueMode\n", - "from labelbox.data.annotation_types import (\n", - " Label, ImageData, ObjectAnnotation, Rectangle, Point, LabelList\n", - ")\n", - "from labelbox.data.serialization import NDJsonConverter\n", - "import uuid\n" - ] - }, - { - "cell_type": "markdown", - "id": "7ff330d7", - "metadata": { - "id": "7ff330d7" - }, - "source": [ - "# API Key and Client\n", - "Provide a valid api key below in order to properly connect to the Labelbox Client." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "preliminary-benchmark", - "metadata": { - "id": "preliminary-benchmark" - }, - "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY= None\n", - "client = Client(api_key=API_KEY)" - ] - }, - { - "cell_type": "markdown", - "id": "working-holiday", - "metadata": { - "id": "working-holiday" - }, - "source": [ - "---- \n", - "### Steps\n", - "1. Make sure project is setup\n", - "2. Collect annotations\n", - "3. Upload" - ] - }, - { - "cell_type": "markdown", - "id": "thirty-socket", - "metadata": { - "id": "thirty-socket" - }, - "source": [ - "### Project setup" - ] - }, - { - "cell_type": "markdown", - "id": "bf25188e-cc79-477a-9b80-0a764449808d", - "metadata": { - "id": "bf25188e-cc79-477a-9b80-0a764449808d" - }, - "source": [ - "We will be creating two projects, one for model-assisted labeling, and one for label imports" - ] - }, - { - "cell_type": "code", - "source": [ - "# Project defaults to batch mode with benchmark quality settings queue_mode argument is not provided\n", - "mal_project = client.create_project(name=\"mal_annotation_import_demo\",\n", - " queue_mode=QueueMode.Batch, \n", - " auto_audit_percentage=1,\n", - " auto_audit_number_of_labels=1,\n", - " media_type=MediaType.Image)\n", - "\n", - "li_project = client.create_project(name=\"label_import_project_demo\",\n", - " queue_mode=QueueMode.Batch,\n", - " auto_audit_percentage=1,\n", - " auto_audit_number_of_labels=1,\n", - " media_type=MediaType.Image)\n", - "\n", - "# Confirm succesfull creation of project \n", - "print(\"Project Name: \", mal_project.name , \" Project Id: \", mal_project.uid)\n", - "print( \"Project Name:\", li_project.name , \"Project Id: \", li_project.uid) \n", - "\n", - "\n", - "######################### DATASET CONSENSUS OPTION ########################\n", - "#Note that dataset base projects will be deprecated in the near future.\n", - "\n", - "#To use Datasets/Consensus instead of Batches/Benchmarks use the following query: \n", - "#In this case, 10% of all data rows need to be annotated by three labelers.\n", - "\n", - "# dataset_project = client.create_project(name=\"datasets-test-project\",\n", - "# description=\"a description\",\n", - "# media_type=MediaType.Image,\n", - "# auto_audit_percentage=0.1,\n", - "# auto_audit_number_of_labels=3,\n", - "# queue_mode=QueueMode.Dataset)\n", - "\n", - "# dataset_project.datasets.connect(dataset)\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "IrXBm66Qa-Ir", - "outputId": "ba57e9a1-0c28-4b58-fcf6-13e78ae44441" - }, - "id": "IrXBm66Qa-Ir", - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Project Name: mal_annotation_import_demo Project Id: cl9sjj4m85by408z508t0gzva\n", - "Project Name: label_import_project_demo Project Id: cl9sjj4wtcwvh07xy3zisa57k\n" - ] - } - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "advanced-trash", - "metadata": { - "id": "advanced-trash", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "2fa388f4-20ec-4740-de00-b6f0e0e399c3" - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "None\n", - "ERRORS: None\n", - "RESULT URL: https://storage.labelbox.com/cl3ahv73w1891087qbwzs3edd%2Fdata-row-imports-results%2Fcl9sjj6m62sq707yd7v3v1v5e_cl9sjj7iicww607xycahnhu7k.json?Expires=1667050751352&KeyName=labelbox-assets-key-3&Signature=VLFJXZ-8VdrnECOEzf-bJgxOQlc\n" - ] - } - ], - "source": [ - "\n", - "dataset = client.create_dataset(name=\"annotation_import_demo_dataset\")\n", - "\n", - "test_imgs_url = [\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--0le83jkA7Hq7N1fvIvTw.jpg\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1() \n", - " },\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--48MAqc82-bZdgGpaiexA.jpg\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1() \n", - " },\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--A4b2SOWVi4KL_ryAAtTg.jpg\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1() \n", - " },\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--DuAQ9qBB2mmsC7hV2Kzg.jpg\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1()\n", - " },\n", - " {\n", - " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/mapillary-traffic/images/--FValmNpFJ8yo8X7uWODA.jpg\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1()\n", - " }\n", - "\n", - "]\n", - "\n", - "\n", - "data_rows = dataset.create_data_rows(test_imgs_url)\n", - "data_rows.wait_till_done()\n", - "print(data_rows.failed_data_rows)\n", - "print(\"ERRORS: \" , data_rows.errors)\n", - "print(\"RESULT URL: \", data_rows.result_url)" - ] - }, - { - "cell_type": "code", - "source": [ - "# Setup Batches and Ontology\n", - "\n", - "# We need the data row ID to create a batch\n", - "batch_datarows = [dr.uid for dr in list(dataset.export_data_rows())]\n", - "\n", - "# Create a batch to send to your MAL project\n", - "batch_mal = mal_project.create_batch(\n", - " \"first-batch-MAL-demo\", # Each batch in a project must have a unique name\n", - " batch_datarows, # A list of data rows or data row ids\n", - " 5 # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "# Create a batch to send to your LI project\n", - "batch_li = li_project.create_batch(\n", - " \"first-batch-LI-demo\", # Each batch in a project must have a unique name\n", - " batch_datarows, # A list of data rows or data row ids\n", - " 5 # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "# Setup your ontology / labeling editor\n", - "# Only update this if you have an on-prem deployment\n", - "ontology_builder = OntologyBuilder(tools=[\n", - " Tool(tool=Tool.Type.BBOX, name=\"box\")\n", - "])\n", - "\n", - "editor = next(client.get_labeling_frontends(where=LabelingFrontend.name == \"Editor\")) # Unless using a custom editor,\n", - "# Connect your ontology and editor to your MAL and LI project\n", - "mal_project.setup(editor, ontology_builder.asdict())\n", - "li_project.setup(editor, ontology_builder.asdict())\n", - "\n", - "print(\"Batch Li: \", batch_li)\n", - "print(\"Batch Mal: \", batch_mal)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "8z1EnMmmjyiz", - "outputId": "e6538781-eb47-40cb-97fe-bed5ceab8513" - }, - "id": "8z1EnMmmjyiz", - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Batch Li: \n", - "Batch Mal: \n" - ] - } - ] - }, - { - "cell_type": "markdown", - "id": "aging-disability", - "metadata": { - "id": "aging-disability" - }, - "source": [ - "#### Create Label using Annotation Type Objects\n", - "* It is recommended to use the Python SDK's annotation types. Below is an example of a bounding box, which is the Rectangle annotation type\n", - "\n", - "* A more in depth example can be found [here](https://docs.labelbox.com/docs/bounding-box-json)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "framed-gateway", - "metadata": { - "id": "framed-gateway" - }, - "outputs": [], - "source": [ - "# Create a label with the related data and annotations\n", - "mal_onto = mal_project.ontology().tools()\n", - "li_onto = li_project.ontology().tools()\n", - "\n", - "## create an annotation with schema ids from both ontologies\n", - "rectangle = Rectangle(start=Point(x=100,y=100), end=Point(x=600,y=600))\n", - "\n", - "rectangle_annotation = ObjectAnnotation(value=rectangle, name=\"box\",feature_schema_id=mal_onto[0].feature_schema_id) \n", - "rectangle_annotation_li = ObjectAnnotation(value=rectangle, name=\"box\",feature_schema_id=li_onto[0].feature_schema_id) " - ] - }, - { - "cell_type": "markdown", - "id": "continental-greeting", - "metadata": { - "id": "continental-greeting" - }, - "source": [ - "### Model Assisted Labeling " - ] - }, - { - "cell_type": "markdown", - "id": "c6244b5e-08d7-4f3e-9689-d44539ad58c0", - "metadata": { - "id": "c6244b5e-08d7-4f3e-9689-d44539ad58c0" - }, - "source": [ - "To do model-assisted labeling, we need to convert a Label object into an NDJSON. \n", - "\n", - "This is easily done with using the NDJSONConverter class\n", - "\n", - "We will create a Label called mal_label which has the same original structure as the label above\n", - "\n", - "Notes:\n", - "* the NDJsonConverter takes in a list of labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10b19393-920a-45c8-9660-42d8c449b9c2", - "metadata": { - "id": "10b19393-920a-45c8-9660-42d8c449b9c2", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "5f9de2ae-71e1-4a6f-8b7b-6957f65145b0" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[{'uuid': '5babc28e-f41b-405a-9e7e-cd1063e357f6',\n", - " 'dataRow': {'id': 'cl9sjj7qapr4p085n11igcvee'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", - " {'uuid': 'e3e3f68e-4d5d-4d36-904e-165abfa15285',\n", - " 'dataRow': {'id': 'cl9sjj7q9pr4l085n7dzp5d93'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", - " {'uuid': 'f36ec573-515f-4ed4-b09c-fddad3b8e695',\n", - " 'dataRow': {'id': 'cl9sjj7q9pr4h085n6fhz285p'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", - " {'uuid': 'f5373bc3-29aa-492b-9305-c3a23b64d78b',\n", - " 'dataRow': {'id': 'cl9sjj7q9pr4d085nelq436ai'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", - " {'uuid': '3df6e667-d72c-4d4f-9138-b358d06d90e3',\n", - " 'dataRow': {'id': 'cl9sjj7q9pr49085nd03kaouz'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjji65cx0307xy29p24i3y',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}}]" - ] - }, - "metadata": {}, - "execution_count": 9 - } - ], - "source": [ - "\n", - "# Create a list of labels\n", - "\n", - "mal_ndjson_ls = []\n", - "\n", - "for data_row in list(mal_project.export_queued_data_rows()):\n", - " image_data = ImageData(uid=data_row['id'])\n", - "\n", - " mal_label = Label(\n", - " data=image_data,\n", - " annotations = [rectangle_annotation]\n", - " )\n", - " mal_ndjson_ls.append(mal_label)\n", - "\n", - "mal_ndjson = list(NDJsonConverter.serialize(mal_ndjson_ls))\n", - "## Create your ndjson list of of labels\n", - "mal_ndjson\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "criminal-register", - "metadata": { - "id": "criminal-register" - }, - "outputs": [], - "source": [ - "upload_job = MALPredictionImport.create_from_objects(\n", - " client = client, \n", - " project_id = mal_project.uid, \n", - " name=\"upload_mal_import_job_demo\", \n", - " predictions=mal_ndjson)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "welsh-nutrition", - "metadata": { - "id": "welsh-nutrition", - "outputId": "abcb2693-c28f-4a2b-abe7-fb12dc92fe3a", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Errors: []\n" - ] - } - ], - "source": [ - "# Errors will appear for each annotation that failed.\n", - "# This will provide information only after the upload_job is complete, so we do not need to worry about having to rerun\n", - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)" - ] - }, - { - "cell_type": "markdown", - "id": "dd1441ee-7c04-4326-a094-2ca34a3548e6", - "metadata": { - "id": "dd1441ee-7c04-4326-a094-2ca34a3548e6" - }, - "source": [ - "### Label Import" - ] - }, - { - "cell_type": "markdown", - "id": "41d103bc-a5fd-4f0b-95f0-7e9bc59fbd07", - "metadata": { - "id": "41d103bc-a5fd-4f0b-95f0-7e9bc59fbd07" - }, - "source": [ - "Label import is very similar to model-assisted labeling. We will create a Label called li_label which has the same original structure as the label above" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "c95716d5-a1ee-46fe-8dca-313ce10f104f", - "metadata": { - "id": "c95716d5-a1ee-46fe-8dca-313ce10f104f", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "8cc23989-b3cc-4ead-8cd4-cc7750c6ab4d" - }, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[{'uuid': '2376c909-cbc2-4905-b084-e25b7d21f367',\n", - " 'dataRow': {'id': 'cl9sjj7qapr4p085n11igcvee'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", - " {'uuid': 'd4b6ea84-0130-4302-9a2c-f3364991d7ac',\n", - " 'dataRow': {'id': 'cl9sjj7q9pr4l085n7dzp5d93'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", - " {'uuid': 'f8cdc6c8-f7bc-4500-8385-d796dae7753c',\n", - " 'dataRow': {'id': 'cl9sjj7q9pr4h085n6fhz285p'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", - " {'uuid': '1ccc9a86-fd65-4057-ae57-709a95a0c20f',\n", - " 'dataRow': {'id': 'cl9sjj7q9pr4d085nelq436ai'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}},\n", - " {'uuid': '98bae092-0b90-4364-813c-3188d5301816',\n", - " 'dataRow': {'id': 'cl9sjj7q9pr49085nd03kaouz'},\n", - " 'name': 'box',\n", - " 'schemaId': 'cl9sjjjvt7ent07wxd7br30oh',\n", - " 'classifications': [],\n", - " 'bbox': {'top': 100.0, 'left': 100.0, 'height': 500.0, 'width': 500.0}}]" - ] - }, - "metadata": {}, - "execution_count": 19 - } - ], - "source": [ - "li_label_list = []\n", - "\n", - "for data_row in list(li_project.export_queued_data_rows()):\n", - " image_data = ImageData(uid=data_row['id'])\n", - "\n", - " li_label = Label(\n", - " data=image_data,\n", - " annotations = [rectangle_annotation_li]\n", - " )\n", - " li_label_list.append(li_label)\n", - "\n", - "## Create your ndjson list of of labels\n", - "li_ndjson = list(NDJsonConverter.serialize(li_label_list))\n", - "\n", - "li_ndjson" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "ef24f3cd-9d16-4e13-af25-7ebcda7dd4d2", - "metadata": { - "id": "ef24f3cd-9d16-4e13-af25-7ebcda7dd4d2" - }, - "outputs": [], - "source": [ - "upload_job = LabelImport.create_from_objects(\n", - " client = client, \n", - " project_id = li_project.uid, \n", - " name=\"upload_label_import_job_demo\", \n", - " labels=li_ndjson)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "c8fba013-7bad-4188-bbe6-47f9f857ec66", - "metadata": { - "id": "c8fba013-7bad-4188-bbe6-47f9f857ec66", - "outputId": "9fa5fb80-d5da-421c-854a-2e97ed5ca872", - "colab": { - "base_uri": "https://localhost:8080/" - } - }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Errors: []\n" - ] - } - ], - "source": [ - "upload_job.wait_until_done()\n", - "print(\"Errors:\", upload_job.errors)\n" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}