diff --git a/examples/model_assisted_labeling/video_mal.ipynb b/examples/model_assisted_labeling/video_mal.ipynb index 8cfc746ff..72cae6bd0 100644 --- a/examples/model_assisted_labeling/video_mal.ipynb +++ b/examples/model_assisted_labeling/video_mal.ipynb @@ -2,534 +2,863 @@ "cells": [ { "cell_type": "markdown", - "id": "db768cda", "metadata": { "id": "db768cda" }, "source": [ "\n", - " \n", + " \n", "" - ] + ], + "id": "db768cda" }, { "cell_type": "markdown", - "id": "cb5611d0", "metadata": { "id": "cb5611d0" }, "source": [ "\n", - "\n", "\n", "\n", "\n", - "\n", - "" - ] + " " + ], + "id": "cb5611d0" }, { "cell_type": "markdown", - "id": "stupid-court", "metadata": { "id": "stupid-court" }, "source": [ - "# Video MAL" - ] + "# Video Annotation Import\n", + "\n", + "* Annotations must be created and uploaded using NDJSON\n", + "* Supported annotations that can be uploaded through the SDK:\n", + " * Bounding box\n", + " * Point\n", + " * Polyline \n", + " * Radio classifications \n", + " * Checklist classifications \n", + "* **NOT** supported:\n", + " * Polygons \n", + " * Segmentation masks\n", + " * Free form text classifications\n", + " * tool nested classification\n", + "\n", + "Please note that this list of unsupported annotations only refers to limitations for importing annotations. For example, when using the Labelbox editor, segmentation masks can be created and edited on video assets." + ], + "id": "stupid-court" }, { "cell_type": "markdown", - "id": "intellectual-idaho", "metadata": { - "id": "intellectual-idaho" + "id": "1zT_5ECvN_cD" }, "source": [ - "* Upload model inferences for video tasks\n", - "* Support types\n", - " * bounding box" - ] + "### Setup" + ], + "id": "1zT_5ECvN_cD" }, { "cell_type": "code", - "execution_count": null, - "id": "voluntary-minister", + "execution_count": 1, "metadata": { - "id": "voluntary-minister" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "voluntary-minister", + "outputId": "fa6395f4-0007-4f19-858f-da429c61d100" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m185.5/185.5 KB\u001b[0m \u001b[31m5.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ], "source": [ "!pip install -q 'labelbox[data]'" - ] + ], + "id": "voluntary-minister" }, { "cell_type": "code", "execution_count": 2, - "id": "committed-richards", "metadata": { "id": "committed-richards" }, "outputs": [], "source": [ - "import os\n", "import uuid\n", - "from io import BytesIO\n", - "from typing import Dict, Any, Tuple\n", - "\n", "from labelbox import Client, LabelingFrontend, MediaType, MALPredictionImport, LabelImport\n", "from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n", "from labelbox.schema.queue_mode import QueueMode\n", "from labelbox.data.annotation_types import (\n", - " Label, TextData, Checklist, Radio, ObjectAnnotation, TextEntity,\n", - " ClassificationAnnotation, ClassificationAnswer, LabelList\n", + " Label, ObjectAnnotation,\n", + " Rectangle, Point, Line, Radio, Checklist, ClassificationAnnotation, ClassificationAnswer\n", ")" - ] + ], + "id": "committed-richards" }, { "cell_type": "markdown", - "id": "c8c876b7", "metadata": { "id": "c8c876b7" }, "source": [ - "# API Key and Client\n", - "Provide a valid api key below in order to properly connect to the Labelbox Client." - ] + "### Replace with your API key \n", + "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" + ], + "id": "c8c876b7" }, { "cell_type": "code", "execution_count": 3, - "id": "affecting-myanmar", "metadata": { "id": "affecting-myanmar" }, "outputs": [], "source": [ "# Add your api key\n", - "API_KEY=None\n", + "API_KEY = None\n", "client = Client(api_key=API_KEY)" - ] + ], + "id": "affecting-myanmar" }, { "cell_type": "markdown", - "id": "blessed-venture", "metadata": { "id": "blessed-venture" }, "source": [ - "### Project Setup" - ] + "## Supported annotations for video\n", + "Only NDJSON annotations are supported with video assets" + ], + "id": "blessed-venture" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kLT9P-WYk4Nr" + }, + "source": [ + "### Supported NDJSON annotations" + ], + "id": "kLT9P-WYk4Nr" }, { "cell_type": "code", "execution_count": 4, - "id": "suburban-crowd", "metadata": { - "id": "suburban-crowd", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "5e41aa51-c12f-458b-cad1-fe6e239b15d4" + "id": "suburban-crowd" }, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ + "######## Bounding box ###########\n", "\n", - "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", - "# Queue mode will be deprecated once dataset mode is deprecated\n", - "mal_project = client.create_project(name=\"video_mal_project_demo\",\n", - " queue_mode=QueueMode.Batch,\n", - " auto_audit_percentage=1,\n", - " auto_audit_number_of_labels=1,\n", - " media_type=MediaType.Video)\n", + "# NDJSON\n", + "bbox_annotation_ndjson = {\n", + " \"name\" : \"bbox_video\",\n", + " \"segments\" : [{\n", + " \"keyframes\" : [\n", + " {\n", + " \"frame\": 13,\n", + " \"bbox\" : {\n", + " \"top\": 146.0,\n", + " \"left\": 98.0,\n", + " \"height\": 382.0,\n", + " \"width\": 341.0\n", + " } \n", + " },\n", + " {\n", + " \"frame\": 14,\n", + " \"bbox\" : {\n", + " \"top\": 146.0,\n", + " \"left\": 98.0,\n", + " \"height\": 382.0,\n", + " \"width\": 341.0\n", + " } \n", + " },\n", + " {\n", + " \"frame\": 15,\n", + " \"bbox\" : {\n", + " \"top\": 146.0,\n", + " \"left\": 98.0,\n", + " \"height\": 382.0,\n", + " \"width\": 341.0\n", + " } \n", + " }\n", + " ]\n", + " }\n", + " ]\n", + "}" + ], + "id": "suburban-crowd" + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "NCkQ1pB6zBne" + }, + "outputs": [], + "source": [ + "######## Point ########\n", "\n", + "#NDJSON\n", + "point_annotation_ndjson = {\n", + " \"name\": \"point_video\", \n", + " \"segments\": [{\n", + " \"keyframes\": [{\n", + " \"frame\": 17,\n", + " \"point\" : {\n", + " \"x\": 660.134 ,\n", + " \"y\": 407.926\n", + " }\n", + " }]\n", + " }] \n", + "}" + ], + "id": "NCkQ1pB6zBne" + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "8xF7QbwZ41Q5" + }, + "outputs": [], + "source": [ + "######## Polyline ########\n", "\n", + "# NDJSON (frame based annotations are supported with NDJSON format)\n", + "polyline_frame_annotation_ndjson = {\n", + " \"name\": \"line_video_frame\", \n", + " \"segments\": [\n", + " {\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 5,\n", + " \"line\": [{\n", + " \"x\": 680,\n", + " \"y\": 100\n", + " },{\n", + " \"x\": 100,\n", + " \"y\": 190\n", + " },{\n", + " \"x\": 190,\n", + " \"y\": 220\n", + " }]\n", + " },\n", + " {\n", + " \"frame\": 12,\n", + " \"line\": [{\n", + " \"x\": 680,\n", + " \"y\": 280\n", + " },{\n", + " \"x\": 300,\n", + " \"y\": 380\n", + " },{\n", + " \"x\": 400,\n", + " \"y\": 460\n", + " }]\n", + " },\n", + " {\n", + " \"frame\": 20,\n", + " \"line\": [{\n", + " \"x\": 680,\n", + " \"y\": 180\n", + " },{\n", + " \"x\": 100,\n", + " \"y\": 200\n", + " },{\n", + " \"x\": 200,\n", + " \"y\": 260\n", + " }]\n", + " }\n", + " ]\n", + " },\n", + " {\n", + " \"keyframes\": [\n", + " {\n", + " \"frame\": 24,\n", + " \"line\": [{\n", + " \"x\": 300,\n", + " \"y\": 310\n", + " },{\n", + " \"x\": 330,\n", + " \"y\": 430\n", + " }]\n", + " },\n", + " {\n", + " \"frame\": 45,\n", + " \"line\": [{\n", + " \"x\": 600,\n", + " \"y\": 810\n", + " },{\n", + " \"x\": 900,\n", + " \"y\": 930\n", + " }]\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + "}" + ], + "id": "8xF7QbwZ41Q5" + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "hm-zJTtE_APt" + }, + "outputs": [], + "source": [ + "######## Checklist classification ########\n", "\n", - "li_project = client.create_project(name=\"video_label_import_project_demo\",\n", - " queue_mode=QueueMode.Batch,\n", - " auto_audit_percentage=1,\n", - " auto_audit_number_of_labels=1,\n", - " media_type=MediaType.Video)\n", + "## NDJSON\n", "\n", - "# # Create one Labelbox dataset\n", - "dataset = client.create_dataset(name=\"video_annotation_import_demo_dataset\")\n", - "upload = {\n", - " \"row_data\":\"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n", - " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1()\n", + "## frame specific\n", + "frame_checklist_classification_ndjson = {\n", + " \"name\": \"checklist_class\", \n", + " \"answer\": [\n", + " { \"name\": \"first_checklist_answer\" , \"frames\": [{\"start\": 29, \"end\": 35 }, {\"start\": 48, \"end\": 65}]},\n", + " { \"name\": \"second_checklist_answer\", \"frames\": [{\"start\": 29, \"end\": 35 }, {\"start\": 48, \"end\": 65}]} \n", + " ] \n", "}\n", "\n", - "data_row = dataset.create_data_row(upload)\n", - "print(data_row)\n", - "\n", - "\n", - "######################### DATASET CONSENSUS OPTION ########################\n", - "#Note that dataset base projects will be deprecated in the near future.\n", - "\n", - "#To use Datasets/Consensus instead of Batches/Benchmarks use the following query: \n", - "#In this case, 10% of all data rows need to be annotated by three labelers.\n", - "\n", - "# dataset_project = client.create_project(name=\"dataset-test-project\",\n", - "# description=\"a description\",\n", - "# media_type=MediaType.Video,\n", - "# auto_audit_percentage=0.1,\n", - "# auto_audit_number_of_labels=3,\n", - "# queue_mode=QueueMode.Dataset)\n", - "\n", - "# dataset_project.datasets.connect(dataset)\n" - ] + "# Global \n", + "global_radio_classification_ndjson = {\n", + " \"name\": \"radio_class_global\", \n", + " \"answer\": { \"name\": \"first_radio_answer\" }\n", + "}\n", + "\n" + ], + "id": "hm-zJTtE_APt" }, { "cell_type": "code", "source": [ - "# We need the data row ID to create a batch\n", - "batch_datarows = [dr.uid for dr in dataset.export_data_rows()]\n", - "\n", - "# Create a batch to send to your MAL project\n", - "batch_mal = mal_project.create_batch(\n", - " \"video-batch-MAL-demo\", # Each batch in a project must have a unique name\n", - " batch_datarows, # A list of data rows or data row ids\n", - " 5 # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "# Create a batch to send to you LIM project\n", - "batch_li = li_project.create_batch(\n", - " \"video-batch-LIM-demo\", # Each batch in a project must have a unique name\n", - " batch_datarows, # A list of data rows or data row ids\n", - " 5 # priority between 1(Highest) - 5(lowest)\n", - ")\n", - "\n", - "# Setup your ontology / labeling editor\n", - "ontology_builder = OntologyBuilder(\n", - " tools=[Tool(tool=Tool.Type.BBOX, name=\"jellyfish\")])\n", - "\n", - "editor = next(client.get_labeling_frontends(where=LabelingFrontend.name == \"Editor\")) # Unless using a custom editor,\n", - "# Connect your ontology and editor to your MAL and LI project\n", - "mal_project.setup(editor, ontology_builder.asdict())\n", - "li_project.setup(editor, ontology_builder.asdict())\n", + "########## Nested Global Classification ########### \n", "\n", - "print(\"Batch Li: \", batch_li)\n", - "print(\"Batch Mal: \", batch_mal)" + "nested_classification = {\n", + " 'name': 'radio_question_nested',\n", + " 'answer': {'name': 'first_radio_question'},\n", + " 'classifications' : [\n", + " {'name': 'sub_question_radio', 'answer': {'name': 'sub_answer'}}\n", + " ]\n", + "}" ], "metadata": { - "id": "oRJF1G7jleSy", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "0b18ce4a-1390-4a2e-c52c-4a7ba5c3674c" + "id": "N5ibW4frr5rw" }, - "id": "oRJF1G7jleSy", - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Batch Li: \n", - "Batch Mal: \n" - ] - } - ] + "id": "N5ibW4frr5rw", + "execution_count": 8, + "outputs": [] }, { "cell_type": "markdown", - "id": "portable-grenada", "metadata": { - "id": "portable-grenada" + "id": "Zpj79CySHpbW" }, "source": [ - "#### Grab featureSchemaIds" - ] + "## Upload Annotations - putting it all together" + ], + "id": "Zpj79CySHpbW" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t-5DTORrHtbo" + }, + "source": [ + "### Step 1: Import data rows into Catalog" + ], + "id": "t-5DTORrHtbo" }, { "cell_type": "code", - "execution_count": 6, - "id": "abstract-fifteen", + "execution_count": 9, "metadata": { - "id": "abstract-fifteen", "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "c8aec626-d528-4438-ca67-86684a0f3dd9" + "id": "ysLRIWM3HGFv", + "outputId": "071bee00-7e26-40be-adfc-daeb90ac5342" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "{'jellyfish': 'cl9sk6g3nd33j07xy52vz1noq'}\n", - "{'jellyfish': 'cl9sk6ehad32w07xyhm8f96xw'}\n" + "clchwj68q01ya072w3d2oahs6\n", + "\n" ] } ], "source": [ - "# When we created a project with the ontology defined above, all of the ids were assigned.\n", - "# So lets reconstruct the ontology builder with all of the ids.\n", - "ontology_li = ontology_builder.from_project(li_project)\n", - "ontology_mal = ontology_builder.from_project(mal_project)\n", - "# # We want all of the feature schemas to be easily accessible by name.\n", - "schema_lookup_li = {tool.name: tool.feature_schema_id for tool in ontology_li.tools}\n", - "schema_lookup_mal = {tool.name: tool.feature_schema_id for tool in ontology_mal.tools}\n", + "from labelbox.data.annotation_types.collection import uuid4\n", + "client = Client(API_KEY)\n", "\n", - "print(schema_lookup_li)\n", - "print(schema_lookup_mal)" - ] + "asset = {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\", \n", + " \"global_key\": str(uuid.uuid4()),\n", + " \"media_type\": \"VIDEO\"\n", + "}\n", + "\n", + "dataset = client.create_dataset(name=\"video_demo_dataset\")\n", + "data_row = dataset.create_data_row(asset)\n", + "print(data_row.uid)\n", + "print(data_row)" + ], + "id": "ysLRIWM3HGFv" }, { "cell_type": "markdown", - "id": "portuguese-arthur", "metadata": { - "id": "portuguese-arthur" + "id": "AXopoqTqKOvW" }, "source": [ - "## Import Format\n", + "### Step 2: Create/select an ontology\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n", "\n", - "**segments**: A segment represents a continuous section where an object is visible. If an instance disappears then the segment ends. If it re-appears, a new segment is created.\n", + "For example, when we create the bounding box annotation [above](https://colab.research.google.com/drive/1S3HBa10jwfFFNB71Uid-crUDdhfEGnXh#scrollTo=suburban-crowd&line=10&uniqifier=1), we provided the `name` as `bbox_video`. Now, when we setup our ontology, we must ensure that the name of my bounding box tool is also `bbox_video`. The same alignment must hold true for the other tools and classifications we create in our ontology.\n", "\n", - "**keyframes**: Key frames identify the location of an instance. Between keyframes, the location of the instance is interpolated.\n", "\n", - "**bbox**: The coordinates of the bounding box" - ] + "[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)" + ], + "id": "AXopoqTqKOvW" }, { "cell_type": "code", - "execution_count": 7, - "id": "5fc417c5", + "execution_count": 10, "metadata": { - "id": "5fc417c5" + "id": "ZCjNJBTSKONt" }, "outputs": [], "source": [ - "segments = [{\n", - " \"keyframes\": [{\n", - " \"frame\": 1,\n", - " \"bbox\": {\n", - " \"top\": 80,\n", - " \"left\": 80,\n", - " \"height\": 80,\n", - " \"width\": 80\n", - " }\n", - " }, {\n", - " \"frame\": 20,\n", - " \"bbox\": {\n", - " \"top\": 125,\n", - " \"left\": 125,\n", - " \"height\": 200,\n", - " \"width\": 300\n", - " }\n", - " }]\n", - "}, {\n", - " \"keyframes\": [{\n", - " \"frame\": 27,\n", - " \"bbox\": {\n", - " \"top\": 80,\n", - " \"left\": 50,\n", - " \"height\": 80,\n", - " \"width\": 50\n", - " }\n", - " }]\n", - "}]" - ] + "ontology_builder = OntologyBuilder(\n", + " tools=[\n", + " Tool(tool=Tool.Type.BBOX, name=\"bbox_video\"),\n", + " Tool(tool=Tool.Type.POINT, name=\"point_video\"),\n", + " Tool(tool=Tool.Type.LINE, name=\"line_video_frame\")\n", + " ],\n", + " classifications=[ \n", + " Classification(\n", + " class_type=Classification.Type.CHECKLIST, \n", + " instructions=\"checklist_class\",\n", + " scope = Classification.Scope.INDEX, ## Need to defined scope for frame classifications \n", + " options=[ \n", + " Option(value=\"first_checklist_answer\"),\n", + " Option(value=\"second_checklist_answer\")\n", + " ]\n", + " ),\n", + " Classification(\n", + " class_type=Classification.Type.RADIO, \n", + " instructions=\"radio_class_global\",\n", + " options=[ \n", + " Option(value=\"first_radio_answer\"),\n", + " Option(value=\"second_radio_answer\")\n", + " ]\n", + " ),\n", + " Classification(\n", + " class_type=Classification.Type.RADIO, \n", + " instructions=\"radio_question_nested\",\n", + " options=[\n", + " Option(\"first_radio_question\",\n", + " options=[\n", + " Classification(\n", + " class_type=Classification.Type.RADIO,\n", + " instructions=\"sub_question_radio\",\n", + " options=[Option(\"sub_answer\")]\n", + " )\n", + " ]\n", + " )\n", + " ] \n", + " ) \n", + " ] \n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Ontology Video Annotations\", ontology_builder.asdict())" + ], + "id": "ZCjNJBTSKONt" }, { "cell_type": "markdown", - "id": "convertible-entry", "metadata": { - "id": "convertible-entry" + "id": "portable-grenada" }, "source": [ - "##### Create helper functions to make this much easier" - ] + "### Step 3: Create a labeling project \n", + "Connect the ontology to the labeling project." + ], + "id": "portable-grenada" }, { "cell_type": "code", - "execution_count": 9, - "id": "developing-beauty", + "execution_count": 11, "metadata": { - "id": "developing-beauty" + "id": "wPLSHXrQPfHh" }, "outputs": [], "source": [ - "def create_video_bbox_ndjson(datarow_id: str, schema_id: str,\n", - " segments: Dict[str, Any]) -> Dict[str, Any]:\n", - " return {\n", - " \"uuid\": str(uuid.uuid4()),\n", - " \"schemaId\": schema_id,\n", - " \"dataRow\": {\n", - " \"id\": datarow_id\n", - " },\n", - " \"segments\": segments\n", - " }" - ] + "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", + "# Queue mode will be deprecated once dataset mode is deprecated\n", + "\n", + "project = client.create_project(name=\"video_project_demo\",\n", + " queue_mode=QueueMode.Batch,\n", + " media_type=MediaType.Video)\n", + "\n", + "## connect ontology to your project\n", + "project.setup_editor(ontology)\n", + "\n", + "######################### DATASET CONSENSUS OPTION ########################\n", + "# Note that dataset base projects will be deprecated in the near future.\n", + "\n", + "# To use Datasets/Consensus instead of Batches/Benchmarks use the following query: \n", + "# In this case, 10% of all data rows need to be annotated by three labelers.\n", + "\n", + "# dataset_project = client.create_project(name=\"dataset-test-project\",\n", + "# description=\"a description\",\n", + "# media_type=MediaType.Text,\n", + "# auto_audit_percentage=0.1,\n", + "# auto_audit_number_of_labels=3,\n", + "# queue_mode=QueueMode.Dataset)\n", + "\n", + "# dataset_project.datasets.connect(dataset)" + ], + "id": "wPLSHXrQPfHh" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GVVPuZVSX6wi" + }, + "source": [ + "### Step 4: Send a batch of data rows to the project" + ], + "id": "GVVPuZVSX6wi" }, { "cell_type": "code", - "execution_count": 10, - "id": "asian-savings", + "execution_count": 12, "metadata": { - "id": "asian-savings" + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UhqBD3gAYCAg", + "outputId": "8f8c0306-a196-4fba-f322-a28f020d090a" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Batch: \n" + ] + } + ], "source": [ - "uploads_li = []\n", - "uploads_mal = []\n", + "# Create batches\n", "\n", + "# Create a batch to send to your MAL project\n", + "batch = project.create_batch(\n", + " \"first-batch-video-demo2\", # Each batch in a project must have a unique name\n", + " dataset.export_data_rows(), # A paginated collection of data row objects\n", + " 5 # priority between 1(Highest) - 5(lowest)\n", + ")\n", "\n", - "for data_row in dataset.data_rows():\n", - " uploads_li.append(\n", - " create_video_bbox_ndjson(data_row.uid, schema_lookup_li['jellyfish'],\n", - " segments))\n", - " \n", - "for data_row in dataset.data_rows():\n", - " uploads_mal.append(\n", - " create_video_bbox_ndjson(data_row.uid, schema_lookup_mal['jellyfish'],\n", - " segments))" - ] + "print(\"Batch: \", batch)" + ], + "id": "UhqBD3gAYCAg" }, { "cell_type": "markdown", - "id": "perfect-seafood", "metadata": { - "id": "perfect-seafood" + "id": "gKVEw7AMYIc1" }, "source": [ - "### Upload the annotations with MAL import" - ] + "### Step 5: Create the annotations payload \n", + "Create the annotations payload using the snippets of code above.\n", + "\n", + "Labelbox supports two formats for the annotations payload: NDJSON and Python Annotation types. However, for video assets, only NDJSON format is supported." + ], + "id": "gKVEw7AMYIc1" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ew0UVWtGZzTO" + }, + "source": [ + "#### NDJSON annotations\n", + "Here we create the complete `label_ndjson` payload of annotations. There is one annotation for each *reference to an annotation* that we created [above](https://colab.research.google.com/drive/1S3HBa10jwfFFNB71Uid-crUDdhfEGnXh#scrollTo=kLT9P-WYk4Nr&line=1&uniqifier=1)." + ], + "id": "ew0UVWtGZzTO" }, { "cell_type": "code", - "execution_count": 11, - "id": "entire-community", + "execution_count": 13, "metadata": { - "id": "entire-community" + "id": "qnFTqR6eZ3mE" }, "outputs": [], "source": [ - "# Let's upload!\n", - "# Validate must be set to false for video bounding boxes\n", - "upload_job = MALPredictionImport.create_from_objects(\n", - " client = client, \n", - " project_id = mal_project.uid, \n", - " name=\"MAL_upload_label_import_job_demo\", \n", - " predictions=uploads_mal\n", - ")" - ] + "label_ndjson = []\n", + "\n", + "for annotations in [point_annotation_ndjson,\n", + " bbox_annotation_ndjson,\n", + " polyline_frame_annotation_ndjson, \n", + " frame_checklist_classification_ndjson, \n", + " global_radio_classification_ndjson,\n", + " nested_classification\n", + " ]: \n", + " annotations.update({\n", + " 'uuid' : str(uuid.uuid4()),\n", + " 'dataRow': {\n", + " 'id': next(dataset.export_data_rows()).uid\n", + " }\n", + " })\n", + " label_ndjson.append(annotations)\n" + ], + "id": "qnFTqR6eZ3mE" }, { "cell_type": "code", - "execution_count": 12, - "id": "hollywood-faculty", + "execution_count": 14, "metadata": { - "id": "hollywood-faculty", - "outputId": "f7f01a91-0fbc-4d7c-903d-a699734f05f0", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "YlQUVJ17Ow0-", + "outputId": "8e9266f9-8f9d-450d-a4d8-cdef0bd596d7" }, "outputs": [ { - "output_type": "stream", - "name": "stdout", - "text": [ - "Errors: []\n" - ] + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'name': 'point_video',\n", + " 'segments': [{'keyframes': [{'frame': 17,\n", + " 'point': {'x': 660.134, 'y': 407.926}}]}],\n", + " 'uuid': 'aec84981-60a0-4c4e-8b8b-0f9dcca65ab3',\n", + " 'dataRow': {'id': 'clchwj68q01ya072w3d2oahs6'}},\n", + " {'name': 'bbox_video',\n", + " 'segments': [{'keyframes': [{'frame': 13,\n", + " 'bbox': {'top': 146.0, 'left': 98.0, 'height': 382.0, 'width': 341.0}},\n", + " {'frame': 14,\n", + " 'bbox': {'top': 146.0, 'left': 98.0, 'height': 382.0, 'width': 341.0}},\n", + " {'frame': 15,\n", + " 'bbox': {'top': 146.0,\n", + " 'left': 98.0,\n", + " 'height': 382.0,\n", + " 'width': 341.0}}]}],\n", + " 'uuid': 'b5d5971a-e418-48d9-a246-4985d3fdd676',\n", + " 'dataRow': {'id': 'clchwj68q01ya072w3d2oahs6'}},\n", + " {'name': 'line_video_frame',\n", + " 'segments': [{'keyframes': [{'frame': 5,\n", + " 'line': [{'x': 680, 'y': 100},\n", + " {'x': 100, 'y': 190},\n", + " {'x': 190, 'y': 220}]},\n", + " {'frame': 12,\n", + " 'line': [{'x': 680, 'y': 280},\n", + " {'x': 300, 'y': 380},\n", + " {'x': 400, 'y': 460}]},\n", + " {'frame': 20,\n", + " 'line': [{'x': 680, 'y': 180},\n", + " {'x': 100, 'y': 200},\n", + " {'x': 200, 'y': 260}]}]},\n", + " {'keyframes': [{'frame': 24,\n", + " 'line': [{'x': 300, 'y': 310}, {'x': 330, 'y': 430}]},\n", + " {'frame': 45, 'line': [{'x': 600, 'y': 810}, {'x': 900, 'y': 930}]}]}],\n", + " 'uuid': 'abf71049-68c0-435c-abcd-03933efc2bba',\n", + " 'dataRow': {'id': 'clchwj68q01ya072w3d2oahs6'}},\n", + " {'name': 'checklist_class',\n", + " 'answer': [{'name': 'first_checklist_answer',\n", + " 'frames': [{'start': 29, 'end': 35}, {'start': 48, 'end': 65}]},\n", + " {'name': 'second_checklist_answer',\n", + " 'frames': [{'start': 29, 'end': 35}, {'start': 48, 'end': 65}]}],\n", + " 'uuid': 'd1a12851-8d17-44db-93c7-1962aa5f2f77',\n", + " 'dataRow': {'id': 'clchwj68q01ya072w3d2oahs6'}},\n", + " {'name': 'radio_class_global',\n", + " 'answer': {'name': 'first_radio_answer'},\n", + " 'uuid': '1cdc6d84-3760-4b27-8c1a-4a067fb436b1',\n", + " 'dataRow': {'id': 'clchwj68q01ya072w3d2oahs6'}},\n", + " {'name': 'radio_question_nested',\n", + " 'answer': {'name': 'first_radio_question'},\n", + " 'classifications': [{'name': 'sub_question_radio',\n", + " 'answer': {'name': 'sub_answer'}}],\n", + " 'uuid': '95d15a9f-1718-4464-b1ac-a3ab2641c3d7',\n", + " 'dataRow': {'id': 'clchwj68q01ya072w3d2oahs6'}}]" + ] + }, + "metadata": {}, + "execution_count": 14 } ], "source": [ - "# Wait for upload to finish (Will take up to five minutes)\n", - "upload_job.wait_until_done()\n", - "# Review the upload status\n", - "print(\"Errors: \",upload_job.errors)" - ] + " label_ndjson" + ], + "id": "YlQUVJ17Ow0-" }, { "cell_type": "markdown", + "metadata": { + "id": "perfect-seafood" + }, "source": [ - "### Upload the annotations with LabelImport" + "### Step 6: Upload annotations to a project as pre-labels or completed labels\n", + "For the purpose of this tutorial only run one of the label imports at once, otherwise the previous import might get overwritten." ], + "id": "perfect-seafood" + }, + { + "cell_type": "markdown", "metadata": { - "id": "GOgi7lwRFU8Q" + "id": "duR8GYczNCmy" }, - "id": "GOgi7lwRFU8Q" + "source": [ + "#### Model-Assisted Labeling (MAL)" + ], + "id": "duR8GYczNCmy" }, { "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "entire-community", + "outputId": "f8c50a5b-fb07-4782-d8a3-f23177c94e07" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Errors: []\n", + " \n" + ] + } + ], "source": [ - "upload_job_li = LabelImport.create_from_objects(\n", - " client = client,\n", - " project_id = li_project.uid, \n", - " name = \"LI_upload_label_import_job_demo\",\n", - " labels=uploads_li\n", - ")" + "# Upload MAL label for this data row in project\n", + "upload_job_mal = MALPredictionImport.create_from_objects(\n", + " client = client, \n", + " project_id = project.uid, \n", + " name=\"mal_import_job-\" + str(uuid.uuid4()), \n", + " predictions=label_ndjson)\n", + "\n", + "upload_job_mal.wait_until_done();\n", + "print(\"Errors:\", upload_job_mal.errors)\n", + "print(\" \")" ], + "id": "entire-community" + }, + { + "cell_type": "markdown", "metadata": { - "id": "s1iFk1qOm66L" + "id": "OCilDEz_wxpb" }, - "id": "s1iFk1qOm66L", - "execution_count": 13, - "outputs": [] + "source": [ + "#### Label Import" + ], + "id": "OCilDEz_wxpb" }, { "cell_type": "code", - "source": [ - "# Wait for upload to finish (Will take up to five minutes)\n", - "upload_job_li.wait_until_done()\n", - "# Review the upload status\n", - "print(\"Errors: \",upload_job_li.errors)" - ], + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, - "id": "UjNPwt8qFS3A", - "outputId": "37fa8f2c-6cb3-46d1-c456-5d0de7605b51" + "id": "si-6kQ4mwUBO", + "outputId": "97d68575-3113-4129-d983-61a7b8ea3e87" }, - "id": "UjNPwt8qFS3A", - "execution_count": 14, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Errors: []\n" + "Errors: []\n", + " \n" ] } - ] + ], + "source": [ + "upload_job_label_import = LabelImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid, \n", + " name = \"label_import_job-\" + str(uuid.uuid4()),\n", + " labels=label_ndjson\n", + ")\n", + "\n", + "upload_job_label_import.wait_until_done();\n", + "print(\"Errors:\", upload_job_label_import.errors)\n", + "print(\" \")" + ], + "id": "si-6kQ4mwUBO" }, { "cell_type": "markdown", - "source": [ - "## Cleanup" - ], "metadata": { - "id": "I4bW74Yd4_Za" + "id": "jdMmQxoVNP6q" }, - "id": "I4bW74Yd4_Za" + "source": [ + "### Optional deletions for cleanup" + ], + "id": "jdMmQxoVNP6q" }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yPZaLM74LFhB" + }, + "outputs": [], "source": [ - "# li_project.delete()\n", - "# mal_project.delete()\n", + "# Delete Project\n", + "# project.delete()\n", "# dataset.delete()" ], - "metadata": { - "id": "4wVPvcXc42jQ" - }, - "id": "4wVPvcXc42jQ", - "execution_count": 15, - "outputs": [] + "id": "yPZaLM74LFhB" } ], "metadata": { + "colab": { + "provenance": [] + }, "kernelspec": { "display_name": "Python 3", "language": "python", @@ -546,10 +875,6 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" - }, - "colab": { - "provenance": [], - "collapsed_sections": [] } }, "nbformat": 4,