diff --git a/examples/annotation_import/image.ipynb b/examples/annotation_import/image.ipynb index d61c597b8..917f61c46 100644 --- a/examples/annotation_import/image.ipynb +++ b/examples/annotation_import/image.ipynb @@ -20,7 +20,7 @@ }, "source": [ "\n", - "\n", "\n", "\n", @@ -41,17 +41,7 @@ }, "source": [ "# Image Annotation Import\n", - "* This notebook will provide examples of each supported annotation type for image assets.\n", - "\n", - "Supported annotations that can be uploaded through the SDK: \n", - "- Bounding box\n", - "- Polygon\n", - "- Point\n", - "- Polyline\n", - "- Segmentation mask\n", - "- Classification - radio\n", - "- Classification - checklist\n", - "- Classification - free-form text\n", + "This notebook will provide examples of each supported annotation type for image assets. \n", "\n", "### [Model-assisted labeling (MAL)](https://docs.labelbox.com/v4/docs/model-assisted-labeling)\n", "* This workflow allows you to import computer-generated predictions (or simply annotations created outside of Labelbox) as pre-labels on an asset. \n", @@ -87,12 +77,12 @@ "id": "6f3cdca1-524f-4247-a63b-2d4371b0257d" }, "source": [ - "### Setup" + "## Imports" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "4d63074b-2379-48af-b9d6-2a66190f03c4", "metadata": { "id": "4d63074b-2379-48af-b9d6-2a66190f03c4" @@ -104,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "01fca8c9-0680-4a9c-a11e-1b49f31e9121", "metadata": { "id": "01fca8c9-0680-4a9c-a11e-1b49f31e9121" @@ -113,14 +103,14 @@ "source": [ "from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n", "from labelbox import Client, MALPredictionImport, LabelImport\n", - "from labelbox.data.serialization import NDJsonConverter\n", - "from labelbox.schema.media_type import MediaType\n", "from labelbox.data.annotation_types import (\n", " Label, ImageData, ObjectAnnotation, MaskData,\n", " Rectangle, Point, Line, Mask, Polygon,\n", " Radio, Checklist, Text,\n", " ClassificationAnnotation, ClassificationAnswer\n", ")\n", + "from labelbox.data.serialization import NDJsonConverter\n", + "from labelbox.schema.media_type import MediaType\n", "import uuid\n", "import numpy as np\n", "from labelbox.schema.queue_mode import QueueMode\n" @@ -132,7 +122,7 @@ "id": "3lZ_ztAxfZ-d" }, "source": [ - "### Replace with your API Key\n", + "## Replace with your API KEY\n", "\n", "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" ], @@ -140,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "id": "wsbqcuCqfZmg" }, @@ -154,7 +144,7 @@ { "cell_type": "markdown", "source": [ - "## Supported annotations for image\n" + "## Supported Annotations for Image\n" ], "metadata": { "id": "PKUj8iRnLMjs" @@ -164,7 +154,7 @@ { "cell_type": "markdown", "source": [ - "### Supported Python annotation types and NDJSON" + "### Supported Annotation Types" ], "metadata": { "id": "OePiibbed0nG" @@ -192,25 +182,13 @@ "id": "v5wL6oojz9Ge" }, "id": "v5wL6oojz9Ge", - "execution_count": null, + "execution_count": 33, "outputs": [] }, { "cell_type": "code", "source": [ - "########## Radio Classification ##########\n", - "\n", - "# Python annotation \n", - "nested_radio_annotation = ClassificationAnnotation(\n", - " name=\"nested_radio_question\", \n", - " value=Radio(answer = ClassificationAnswer(name = \"first_radio_answer\")),\n", - " classifications=[\n", - " \tClassificationAnnotation(\n", - " \tname=\"sub_radio_question\",\n", - " \t\tvalue=Radio(answer=ClassificationAnswer(name=\"first_sub_radio_answer\"))\n", - " )\n", - " ]\n", - ")\n", + "########## Nested Radio Classification is only supported with NDJSON tools##########\n", "\n", "# NDJSON \n", "nested_radio_annotation_ndjson = {\n", @@ -225,7 +203,7 @@ "id": "I75K-wx7_sDs" }, "id": "I75K-wx7_sDs", - "execution_count": null, + "execution_count": 34, "outputs": [] }, { @@ -252,7 +230,7 @@ "id": "b2UjSoYez9I1" }, "id": "b2UjSoYez9I1", - "execution_count": null, + "execution_count": 35, "outputs": [] }, { @@ -277,7 +255,7 @@ "id": "qGSXRtKpz9LQ" }, "id": "qGSXRtKpz9LQ", - "execution_count": null, + "execution_count": 36, "outputs": [] }, { @@ -310,13 +288,13 @@ "id": "xCU4JRP0z9Nh" }, "id": "xCU4JRP0z9Nh", - "execution_count": null, + "execution_count": 37, "outputs": [] }, { "cell_type": "code", "source": [ - "####### Bounding box with nested classification #######\n", + "# Bounding box with nested classification\n", "bbox_with_radio_subclass_annotation = ObjectAnnotation(\n", " name=\"bbox_with_radio_subclass\", # must match your ontology feature's name\n", " value=Rectangle(\n", @@ -353,7 +331,7 @@ "id": "gAIzsxEjLmhv" }, "id": "gAIzsxEjLmhv", - "execution_count": null, + "execution_count": 38, "outputs": [] }, { @@ -400,13 +378,13 @@ "id": "jRwfE4MFz9Ph" }, "id": "jRwfE4MFz9Ph", - "execution_count": null, + "execution_count": 39, "outputs": [] }, { "cell_type": "code", "source": [ - "######### Segmentation mask #########\n", + "######### Mask #########\n", "\n", "# Python \n", "# Identifying what values in the numpy array correspond to the mask annotation\n", @@ -443,13 +421,13 @@ "id": "39vz-tYsz9Ry" }, "id": "39vz-tYsz9Ry", - "execution_count": null, + "execution_count": 40, "outputs": [] }, { "cell_type": "code", "source": [ - "######## Point ########\n", + "######## Point Annotation ########\n", "\n", "# Python Annotation\n", "point_annotation = ObjectAnnotation(\n", @@ -469,7 +447,7 @@ "id": "UelSiWN2z9Tg" }, "id": "UelSiWN2z9Tg", - "execution_count": null, + "execution_count": 41, "outputs": [] }, { @@ -524,13 +502,13 @@ "id": "mrjb8qY3z9VY" }, "id": "mrjb8qY3z9VY", - "execution_count": null, + "execution_count": 42, "outputs": [] }, { "cell_type": "markdown", "source": [ - "## Upload Annotations - putting it all together\n" + "# Upload Annotations - putting it all together\n" ], "metadata": { "id": "cHom688XLQuM" @@ -540,7 +518,7 @@ { "cell_type": "markdown", "source": [ - "### Step 1: Import data rows into Catalog\n", + "## Step 1: Import data rows into Catalog\n", "\n" ], "metadata": { @@ -552,50 +530,66 @@ "cell_type": "code", "source": [ "# send a sample image as batch to the project\n", + "\n", + "\n", "test_img_url = {\n", - " \"row_data\": \"https://raw.githubusercontent.com/Labelbox/labelbox-python/develop/examples/assets/2560px-Kitano_Street_Kobe01s5s4110.jpg\",\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/image_sample_data/2560px-Kitano_Street_Kobe01s5s4110.jpeg\",\n", " \"global_key\": str(uuid.uuid4())\n", "}\n", + " \n", + "\n", "dataset = client.create_dataset(name=\"demo_dataset_img\")\n", "data_row = dataset.create_data_row(test_img_url)\n", - "print(data_row)\n" + "print(data_row)\n", + "\n", + "len(test_img_url)" ], "metadata": { "id": "xJ3DZLv_LGsK", "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "f7e46b57-0301-4446-c887-ef0ddda03ebf" + "outputId": "2c447152-e7d8-4a42-889b-0a6344ad371d" }, "id": "xJ3DZLv_LGsK", - "execution_count": null, + "execution_count": 45, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n" ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "2" + ] + }, + "metadata": {}, + "execution_count": 45 } ] }, { "cell_type": "markdown", "source": [ - "### Step 2: Create/select an Ontology\n", + "## Step 2: Create/select an Ontology\n", "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", - "For example, when we create the bounding box annotation [above](https://colab.research.google.com/drive/1jxU4Lz1KODkI4xjNLzFEL7BbFX0DRgTU#scrollTo=mrjb8qY3z9VY&line=4&uniqifier=1), we provided the `name` as `polyline`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `polyline`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", + "For example, when we create the bounding box annotation above, we provided the `name` as `polyline`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `polyline`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", "\n", "\n" ], @@ -684,13 +678,13 @@ "id": "ojonQWaO3Rfv" }, "id": "ojonQWaO3Rfv", - "execution_count": null, + "execution_count": 46, "outputs": [] }, { "cell_type": "markdown", "source": [ - "### Step 3: Create a labeling project\n", + "## Step 3: Create a labeling project\n", "\n", "Connect the ontology to the labeling project" ], @@ -705,7 +699,7 @@ "\n", "# create a project and configure the ontology \n", "project = client.create_project(\n", - " name=\"annotations_import_project\",\n", + " name=\"annotations_import_project_demo\",\n", " media_type=MediaType.Image,\n", " queue_mode=QueueMode.Batch)\n", "\n", @@ -715,13 +709,13 @@ "id": "w1G8Mmb_z9Zx" }, "id": "w1G8Mmb_z9Zx", - "execution_count": null, + "execution_count": 47, "outputs": [] }, { "cell_type": "markdown", "source": [ - "### Step 4: Send a batch of data rows to the project\n" + "## Step 4: Send a batch of data rows to the project\n" ], "metadata": { "id": "sz1Ulh0n6OJQ" @@ -733,7 +727,7 @@ "source": [ "batch = project.create_batch(\n", " \"Initial batch2\", # name of the batch\n", - " [data_row.uid], # list of Data Rows\n", + " dataset.export_data_rows(), # list of Data Rows\n", " 1 # priority between 1-5\n", ")\n", "print(\"Batch\", batch)" @@ -743,10 +737,10 @@ "base_uri": "https://localhost:8080/" }, "id": "eP8N9JOX6QZ_", - "outputId": "1b7a1a17-fe21-4fe5-f5e5-35ede3724ae7" + "outputId": "b1b66425-a276-4599-fd73-4c168588efe3" }, "id": "eP8N9JOX6QZ_", - "execution_count": null, + "execution_count": 48, "outputs": [ { "output_type": "stream", @@ -754,11 +748,11 @@ "text": [ "Batch \n" ] } @@ -767,7 +761,7 @@ { "cell_type": "markdown", "source": [ - "### Step 5: Create the annotations payload\n", + "## Step 5: Create the annotations payload\n", "\n", "Create the annotations payload using the snippets of code above\n", "\n", @@ -781,9 +775,9 @@ { "cell_type": "markdown", "source": [ - "#### Python Annotations\n", + "### Python Annotations\n", "\n", - "Here we create the complete label NDJSON payload of annotations only using Python annotation format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1jxU4Lz1KODkI4xjNLzFEL7BbFX0DRgTU#scrollTo=PKUj8iRnLMjs&line=1&uniqifier=1)." + "Here we create the complete label ndjson payload of annotations only using python annotation format. There is one annotation for each reference to an annotation that we created." ], "metadata": { "id": "TgV8phkdecRi" @@ -794,49 +788,41 @@ "cell_type": "code", "source": [ "# create a Label\n", - "label = Label(\n", - " data=ImageData(\n", - " uid=data_row.uid),\n", - " annotations = [\n", - " radio_annotation, \n", - " checklist_annotation, \n", - " text_annotation,\n", - " bbox_annotation, \n", - " bbox_with_radio_subclass_annotation, \n", - " polygon_annotation, \n", - " mask_annotation, \n", - " point_annotation, \n", - " polyline_annotation\n", - " ]\n", + "\n", + "label = []\n", + "for data_row in dataset.export_data_rows():\n", + " label.append(Label(\n", + " data=ImageData(\n", + " uid=data_row.uid),\n", + " annotations = [\n", + " checklist_annotation, \n", + " text_annotation,\n", + " bbox_annotation, \n", + " bbox_with_radio_subclass_annotation, \n", + " polygon_annotation, \n", + " mask_annotation, \n", + " point_annotation, \n", + " polyline_annotation\n", + " ]\n", + " )\n", ")\n", "\n", + "\n", "# Convert our label from a Labelbox class object to the underlying NDJSON format required for upload \n", - "label_ndjson = list(NDJsonConverter.serialize([label]))" + "label_ndjson = list(NDJsonConverter.serialize(label))" ], "metadata": { "id": "gwExeqRHz9eW" }, "id": "gwExeqRHz9eW", - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "label_ndjson" - ], - "metadata": { - "id": "1dFGDHay9ai1" - }, - "id": "1dFGDHay9ai1", - "execution_count": null, + "execution_count": 49, "outputs": [] }, { "cell_type": "markdown", "source": [ - "#### NDJSON annotations\n", - "Here we create the complete label NDJSON payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created [above](https://colab.research.google.com/drive/1jxU4Lz1KODkI4xjNLzFEL7BbFX0DRgTU#scrollTo=PKUj8iRnLMjs&line=1&uniqifier=1)." + "### NDJSON annotations\n", + "Here we create the complete label ndjson payload of annotations only using NDJSON format. There is one annotation for each reference to an annotation that we created above." ], "metadata": { "id": "rwP_YlBVeiex" @@ -867,123 +853,13 @@ "id": "QPBF3n0Cehsq" }, "id": "QPBF3n0Cehsq", - "execution_count": null, + "execution_count": 50, "outputs": [] }, - { - "cell_type": "code", - "source": [ - "label_ndjson_method2" - ], - "metadata": { - "id": "gGtMf6mO6yKv", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "efc16d15-2925-4132-f2da-f894c279204d" - }, - "id": "gGtMf6mO6yKv", - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[{'name': 'radio_question',\n", - " 'answer': {'name': 'second_radio_answer'},\n", - " 'uuid': 'a4437cc6-06cd-4a18-968c-3c5102565734',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'checklist_question',\n", - " 'answer': [{'name': 'first_checklist_answer'},\n", - " {'name': 'second_checklist_answer'}],\n", - " 'uuid': '916ecf0e-904a-465e-8f77-95008c31ee3b',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'free_text',\n", - " 'answer': 'sample text',\n", - " 'uuid': 'ab2efc0d-7791-4ac4-afda-a67b6b3be562',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'bounding_box',\n", - " 'bbox': {'top': 977, 'left': 1690, 'height': 330, 'width': 225},\n", - " 'uuid': '8dc44323-220e-49d7-90aa-83dfe981f985',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'bbox_with_radio_subclass',\n", - " 'classifications': [{'name': 'sub_radio_question',\n", - " 'answer': {'name': 'first_sub_radio_answer'}}],\n", - " 'bbox': {'top': 933, 'left': 541, 'height': 191, 'width': 330},\n", - " 'uuid': '1b8a06b9-cf84-438a-ac85-bd9fc783762c',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'polygon',\n", - " 'polygon': [{'x': 1489.581, 'y': 183.934},\n", - " {'x': 2278.306, 'y': 256.885},\n", - " {'x': 2428.197, 'y': 200.437},\n", - " {'x': 2560.0, 'y': 335.419},\n", - " {'x': 2557.386, 'y': 503.165},\n", - " {'x': 2320.596, 'y': 503.103},\n", - " {'x': 2156.083, 'y': 628.943},\n", - " {'x': 2161.111, 'y': 785.519},\n", - " {'x': 2002.115, 'y': 894.647},\n", - " {'x': 1838.456, 'y': 877.874},\n", - " {'x': 1436.53, 'y': 874.636},\n", - " {'x': 1411.403, 'y': 758.579},\n", - " {'x': 1353.853, 'y': 751.74},\n", - " {'x': 1345.264, 'y': 453.461},\n", - " {'x': 1426.011, 'y': 421.129},\n", - " {'x': 1489.581, 'y': 183.934}],\n", - " 'uuid': 'b9fbc7f9-1c95-479d-a9ab-1d8e379d343b',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'mask',\n", - " 'classifications': [],\n", - " 'mask': {'instanceURI': 'https://storage.labelbox.com/cjhfn5y6s0pk507024nz1ocys%2F1d60856c-59b7-3060-2754-83f7e93e0d01-1?Expires=1666901963361&KeyName=labelbox-assets-key-3&Signature=t-2s2DB4YjFuWEFak0wxYqfBfZA',\n", - " 'colorRGB': (0, 0, 0)},\n", - " 'uuid': '7f39d8c7-9d7d-4022-86e0-f2737c54c18d',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'point',\n", - " 'classifications': [],\n", - " 'point': {'x': 1166.606, 'y': 1441.768},\n", - " 'uuid': '697d1750-31e7-483c-bb88-4d53f9482ace',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'polyline',\n", - " 'classifications': [],\n", - " 'line': [{'x': 2534.353, 'y': 249.471},\n", - " {'x': 2429.492, 'y': 182.092},\n", - " {'x': 2294.322, 'y': 221.962},\n", - " {'x': 2224.491, 'y': 180.463},\n", - " {'x': 2136.123, 'y': 204.716},\n", - " {'x': 1712.247, 'y': 173.949},\n", - " {'x': 1703.838, 'y': 84.438},\n", - " {'x': 1579.772, 'y': 82.61},\n", - " {'x': 1583.442, 'y': 167.552},\n", - " {'x': 1478.869, 'y': 164.903},\n", - " {'x': 1418.941, 'y': 318.149},\n", - " {'x': 1243.128, 'y': 400.815},\n", - " {'x': 1022.067, 'y': 319.007},\n", - " {'x': 892.367, 'y': 379.216},\n", - " {'x': 670.273, 'y': 364.408},\n", - " {'x': 613.114, 'y': 288.16},\n", - " {'x': 377.559, 'y': 238.251},\n", - " {'x': 368.087, 'y': 185.064},\n", - " {'x': 246.557, 'y': 167.286},\n", - " {'x': 236.648, 'y': 285.61},\n", - " {'x': 90.929, 'y': 326.412}],\n", - " 'uuid': 'a0905ab6-6cfb-45ad-88d8-f66e6a7d20b2',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}},\n", - " {'name': 'nested_radio_question',\n", - " 'answer': {'name': 'first_radio_answer'},\n", - " 'classifications': [{'name': 'sub_radio_question',\n", - " 'answer': {'name': 'first_sub_radio_answer'}}],\n", - " 'uuid': '823bc863-0d6c-4ca8-9acd-d4aadeccce6b',\n", - " 'dataRow': {'id': 'clbzjs8yb0wnb0718bp4w55kw'}}]" - ] - }, - "metadata": {}, - "execution_count": 150 - } - ] - }, { "cell_type": "markdown", "source": [ - "### Step 6: Upload annotations to a project as pre-labels or complete labels" + "## Step 6: Upload annotations to a project as pre-labels or complete labels" ], "metadata": { "id": "CwCrhyiOU8am" @@ -993,7 +869,7 @@ { "cell_type": "markdown", "source": [ - "#### Model-Assisted Labeling\n", + "### Model-Assisted Labeling\n", "For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Annotation types). Delete the previous labels before uploading labels that use the 2nd method (ndjson)\n" ], "metadata": { @@ -1009,22 +885,35 @@ " client = client, \n", " project_id = project.uid, \n", " name=\"mal_job\"+str(uuid.uuid4()), \n", - " predictions=label_ndjson)\n", + " predictions=label_ndjson_method2)\n", "\n", "print(\"Errors:\", upload_job.errors)\n", "print(\" \")" ], "metadata": { - "id": "wPflwCr3_03e" + "id": "wPflwCr3_03e", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0672edf2-c841-4f80-cfd9-02100259f694" }, "id": "wPflwCr3_03e", - "execution_count": null, - "outputs": [] + "execution_count": 51, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Errors: []\n", + " \n" + ] + } + ] }, { "cell_type": "markdown", "source": [ - "#### Label Import\n" + "### Label Import\n" ], "metadata": { "id": "_C5wY3ctMCRs" @@ -1049,10 +938,10 @@ "base_uri": "https://localhost:8080/" }, "id": "l2I_dRyXz9i2", - "outputId": "240b7132-5c93-42d6-9ac3-ded484a2ad53" + "outputId": "0507f043-e037-4ef5-ae2c-490739a01267" }, "id": "l2I_dRyXz9i2", - "execution_count": null, + "execution_count": 54, "outputs": [ { "output_type": "stream", @@ -1064,20 +953,9 @@ } ] }, - { - "cell_type": "markdown", - "source": [ - "### Optional deletions for cleanup" - ], - "metadata": { - "id": "4aiUsGnFn5kG" - }, - "id": "4aiUsGnFn5kG" - }, { "cell_type": "code", "source": [ - "#upload_job\n", "# project.delete()\n", "# dataset.delete()" ], @@ -1092,7 +970,8 @@ "metadata": { "colab": { "collapsed_sections": [ - "6f3cdca1-524f-4247-a63b-2d4371b0257d" + "6f3cdca1-524f-4247-a63b-2d4371b0257d", + "OePiibbed0nG" ], "provenance": [] },