diff --git a/examples/model_assisted_labeling/video_mal.ipynb b/examples/model_assisted_labeling/video_mal.ipynb index 80a79dc6e..8cfc746ff 100644 --- a/examples/model_assisted_labeling/video_mal.ipynb +++ b/examples/model_assisted_labeling/video_mal.ipynb @@ -1,346 +1,557 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "db768cda", - "metadata": {}, - "source": [ - "\n", - " \n", - "" - ] - }, - { - "cell_type": "markdown", - "id": "cb5611d0", - "metadata": {}, - "source": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "id": "stupid-court", - "metadata": {}, - "source": [ - "# Video MAL" - ] - }, - { - "cell_type": "markdown", - "id": "intellectual-idaho", - "metadata": {}, - "source": [ - "* Upload model inferences for video tasks\n", - "* Support types\n", - " * bounding box" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "voluntary-minister", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install -q labelbox" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "committed-richards", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import uuid\n", - "from io import BytesIO\n", - "from typing import Dict, Any, Tuple\n", - "\n", - "from labelbox import Client, LabelingFrontend\n", - "from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n", - "from labelbox.schema.media_type import MediaType" - ] - }, - { - "cell_type": "markdown", - "id": "c8c876b7", - "metadata": {}, - "source": [ - "# API Key and Client\n", - "Provide a valid api key below in order to properly connect to the Labelbox Client." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "affecting-myanmar", - "metadata": {}, - "outputs": [], - "source": [ - "# Add your api key\n", - "API_KEY = None\n", - "client = Client(api_key=API_KEY)" - ] - }, - { - "cell_type": "markdown", - "id": "blessed-venture", - "metadata": {}, - "source": [ - "### Project Setup" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "suburban-crowd", - "metadata": {}, - "outputs": [], - "source": [ - "# We want to try out a few different tools here.\n", - "ontology_builder = OntologyBuilder(\n", - " tools=[Tool(tool=Tool.Type.BBOX, name=\"jellyfish\")])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "modern-program", - "metadata": {}, - "outputs": [], - "source": [ - "# Lets setup a project to label\n", - "# Note see Ontology, Project, and Project_setup notebooks for more information on this section.\n", - "project = client.create_project(name=\"video_mal_project\", media_type=MediaType.Video)\n", - "dataset = client.create_dataset(name=\"video_mal_dataset\")\n", - "dataset.create_data_row(\n", - " row_data=\n", - " \"https://storage.labelbox.com/cjhfn5y6s0pk507024nz1ocys%2Fb8837f3b-b071-98d9-645e-2e2c0302393b-jellyfish2-100-110.mp4\"\n", - ")\n", - "editor = next(\n", - " client.get_labeling_frontends(where=LabelingFrontend.name == \"Editor\"))\n", - "project.setup(editor, ontology_builder.asdict())\n", - "project.datasets.connect(dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "portable-grenada", - "metadata": {}, - "source": [ - "#### Grab featureSchemaIds" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "abstract-fifteen", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'jellyfish': 'cky3dt2lja37d0z9t26wf3qo5'}\n" - ] - } - ], - "source": [ - "# When we created a project with the ontology defined above, all of the ids were assigned.\n", - "# So lets reconstruct the ontology builder with all of the ids.\n", - "ontology = ontology_builder.from_project(project)\n", - "# We want all of the feature schemas to be easily accessible by name.\n", - "schema_lookup = {tool.name: tool.feature_schema_id for tool in ontology.tools}\n", - "print(schema_lookup)" - ] - }, - { - "cell_type": "markdown", - "id": "portuguese-arthur", - "metadata": {}, - "source": [ - "## Import Format\n", - "\n", - "* [Documentation](https://docs.labelbox.com/docs/bounding-box-json)\n", - "\n", - "\n", - "```\n", - "Each row of the import is a unique instance\n", - "\n", - "schemaId: \n", - "dataRow:\n", - " id: \n", - "Instance:\n", - " [Segments]:\n", - " [KeyFrames]:\n", - " frame:\n", - " bbox:\n", - " top:\n", - " bottom:\n", - " height:\n", - " width:\n", - "```\n", - "\n", - "**segments**: A segment represents a continuous section where an object is visible. If an instance disappears then the segment ends. If it re-appears, a new segment is created.\n", - "\n", - "**keyframes**: Key frames identify the location of an instance. Between keyframes, the location of the instance is interpolated.\n", - "\n", - "**bbox**: The coordinates of the bounding box" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "5fc417c5", - "metadata": {}, - "outputs": [], - "source": [ - "segments = [{\n", - " \"keyframes\": [{\n", - " \"frame\": 1,\n", - " \"bbox\": {\n", - " \"top\": 80,\n", - " \"left\": 80,\n", - " \"height\": 80,\n", - " \"width\": 80\n", - " }\n", - " }, {\n", - " \"frame\": 20,\n", - " \"bbox\": {\n", - " \"top\": 125,\n", - " \"left\": 125,\n", - " \"height\": 200,\n", - " \"width\": 300\n", - " }\n", - " }]\n", - "}, {\n", - " \"keyframes\": [{\n", - " \"frame\": 27,\n", - " \"bbox\": {\n", - " \"top\": 80,\n", - " \"left\": 50,\n", - " \"height\": 80,\n", - " \"width\": 50\n", - " }\n", - " }]\n", - "}]" - ] - }, - { - "cell_type": "markdown", - "id": "convertible-entry", - "metadata": {}, - "source": [ - "##### Create helper functions to make this much easier" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "developing-beauty", - "metadata": {}, - "outputs": [], - "source": [ - "def create_video_bbox_ndjson(datarow_id: str, schema_id: str,\n", - " segments: Dict[str, Any]) -> Dict[str, Any]:\n", - " return {\n", - " \"uuid\": str(uuid.uuid4()),\n", - " \"schemaId\": schema_id,\n", - " \"dataRow\": {\n", - " \"id\": datarow_id\n", - " },\n", - " \"segments\": segments\n", - " }" - ] + "cells": [ + { + "cell_type": "markdown", + "id": "db768cda", + "metadata": { + "id": "db768cda" + }, + "source": [ + "\n", + " \n", + "" + ] + }, + { + "cell_type": "markdown", + "id": "cb5611d0", + "metadata": { + "id": "cb5611d0" + }, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "id": "stupid-court", + "metadata": { + "id": "stupid-court" + }, + "source": [ + "# Video MAL" + ] + }, + { + "cell_type": "markdown", + "id": "intellectual-idaho", + "metadata": { + "id": "intellectual-idaho" + }, + "source": [ + "* Upload model inferences for video tasks\n", + "* Support types\n", + " * bounding box" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "voluntary-minister", + "metadata": { + "id": "voluntary-minister" + }, + "outputs": [], + "source": [ + "!pip install -q 'labelbox[data]'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "committed-richards", + "metadata": { + "id": "committed-richards" + }, + "outputs": [], + "source": [ + "import os\n", + "import uuid\n", + "from io import BytesIO\n", + "from typing import Dict, Any, Tuple\n", + "\n", + "from labelbox import Client, LabelingFrontend, MediaType, MALPredictionImport, LabelImport\n", + "from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n", + "from labelbox.schema.queue_mode import QueueMode\n", + "from labelbox.data.annotation_types import (\n", + " Label, TextData, Checklist, Radio, ObjectAnnotation, TextEntity,\n", + " ClassificationAnnotation, ClassificationAnswer, LabelList\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c8c876b7", + "metadata": { + "id": "c8c876b7" + }, + "source": [ + "# API Key and Client\n", + "Provide a valid api key below in order to properly connect to the Labelbox Client." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "affecting-myanmar", + "metadata": { + "id": "affecting-myanmar" + }, + "outputs": [], + "source": [ + "# Add your api key\n", + "API_KEY=None\n", + "client = Client(api_key=API_KEY)" + ] + }, + { + "cell_type": "markdown", + "id": "blessed-venture", + "metadata": { + "id": "blessed-venture" + }, + "source": [ + "### Project Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "suburban-crowd", + "metadata": { + "id": "suburban-crowd", + "colab": { + "base_uri": "https://localhost:8080/" }, + "outputId": "5e41aa51-c12f-458b-cad1-fe6e239b15d4" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 11, - "id": "asian-savings", - "metadata": {}, - "outputs": [], - "source": [ - "uploads = []\n", - "\n", - "for data_row in dataset.data_rows():\n", - " uploads.append(\n", - " create_video_bbox_ndjson(data_row.uid, schema_lookup['jellyfish'],\n", - " segments))" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ], + "source": [ + "\n", + "# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n", + "# Queue mode will be deprecated once dataset mode is deprecated\n", + "mal_project = client.create_project(name=\"video_mal_project_demo\",\n", + " queue_mode=QueueMode.Batch,\n", + " auto_audit_percentage=1,\n", + " auto_audit_number_of_labels=1,\n", + " media_type=MediaType.Video)\n", + "\n", + "\n", + "\n", + "li_project = client.create_project(name=\"video_label_import_project_demo\",\n", + " queue_mode=QueueMode.Batch,\n", + " auto_audit_percentage=1,\n", + " auto_audit_number_of_labels=1,\n", + " media_type=MediaType.Video)\n", + "\n", + "# # Create one Labelbox dataset\n", + "dataset = client.create_dataset(name=\"video_annotation_import_demo_dataset\")\n", + "upload = {\n", + " \"row_data\":\"https://storage.googleapis.com/labelbox-datasets/video-sample-data/sample-video-2.mp4\",\n", + " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1()\n", + "}\n", + "\n", + "data_row = dataset.create_data_row(upload)\n", + "print(data_row)\n", + "\n", + "\n", + "######################### DATASET CONSENSUS OPTION ########################\n", + "#Note that dataset base projects will be deprecated in the near future.\n", + "\n", + "#To use Datasets/Consensus instead of Batches/Benchmarks use the following query: \n", + "#In this case, 10% of all data rows need to be annotated by three labelers.\n", + "\n", + "# dataset_project = client.create_project(name=\"dataset-test-project\",\n", + "# description=\"a description\",\n", + "# media_type=MediaType.Video,\n", + "# auto_audit_percentage=0.1,\n", + "# auto_audit_number_of_labels=3,\n", + "# queue_mode=QueueMode.Dataset)\n", + "\n", + "# dataset_project.datasets.connect(dataset)\n" + ] + }, + { + "cell_type": "code", + "source": [ + "# We need the data row ID to create a batch\n", + "batch_datarows = [dr.uid for dr in dataset.export_data_rows()]\n", + "\n", + "# Create a batch to send to your MAL project\n", + "batch_mal = mal_project.create_batch(\n", + " \"video-batch-MAL-demo\", # Each batch in a project must have a unique name\n", + " batch_datarows, # A list of data rows or data row ids\n", + " 5 # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "# Create a batch to send to you LIM project\n", + "batch_li = li_project.create_batch(\n", + " \"video-batch-LIM-demo\", # Each batch in a project must have a unique name\n", + " batch_datarows, # A list of data rows or data row ids\n", + " 5 # priority between 1(Highest) - 5(lowest)\n", + ")\n", + "\n", + "# Setup your ontology / labeling editor\n", + "ontology_builder = OntologyBuilder(\n", + " tools=[Tool(tool=Tool.Type.BBOX, name=\"jellyfish\")])\n", + "\n", + "editor = next(client.get_labeling_frontends(where=LabelingFrontend.name == \"Editor\")) # Unless using a custom editor,\n", + "# Connect your ontology and editor to your MAL and LI project\n", + "mal_project.setup(editor, ontology_builder.asdict())\n", + "li_project.setup(editor, ontology_builder.asdict())\n", + "\n", + "print(\"Batch Li: \", batch_li)\n", + "print(\"Batch Mal: \", batch_mal)" + ], + "metadata": { + "id": "oRJF1G7jleSy", + "colab": { + "base_uri": "https://localhost:8080/" }, + "outputId": "0b18ce4a-1390-4a2e-c52c-4a7ba5c3674c" + }, + "id": "oRJF1G7jleSy", + "execution_count": 5, + "outputs": [ { - "cell_type": "markdown", - "id": "perfect-seafood", - "metadata": {}, - "source": [ - "### Upload the annotations" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Batch Li: \n", + "Batch Mal: \n" + ] + } + ] + }, + { + "cell_type": "markdown", + "id": "portable-grenada", + "metadata": { + "id": "portable-grenada" + }, + "source": [ + "#### Grab featureSchemaIds" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "abstract-fifteen", + "metadata": { + "id": "abstract-fifteen", + "colab": { + "base_uri": "https://localhost:8080/" }, + "outputId": "c8aec626-d528-4438-ca67-86684a0f3dd9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 12, - "id": "entire-community", - "metadata": {}, - "outputs": [], - "source": [ - "# Let's upload!\n", - "# Validate must be set to false for video bounding boxes\n", - "upload_task = project.upload_annotations(name=f\"upload-job-{uuid.uuid4()}\",\n", - " annotations=uploads,\n", - " validate=False)" - ] - }, + "output_type": "stream", + "name": "stdout", + "text": [ + "{'jellyfish': 'cl9sk6g3nd33j07xy52vz1noq'}\n", + "{'jellyfish': 'cl9sk6ehad32w07xyhm8f96xw'}\n" + ] + } + ], + "source": [ + "# When we created a project with the ontology defined above, all of the ids were assigned.\n", + "# So lets reconstruct the ontology builder with all of the ids.\n", + "ontology_li = ontology_builder.from_project(li_project)\n", + "ontology_mal = ontology_builder.from_project(mal_project)\n", + "# # We want all of the feature schemas to be easily accessible by name.\n", + "schema_lookup_li = {tool.name: tool.feature_schema_id for tool in ontology_li.tools}\n", + "schema_lookup_mal = {tool.name: tool.feature_schema_id for tool in ontology_mal.tools}\n", + "\n", + "print(schema_lookup_li)\n", + "print(schema_lookup_mal)" + ] + }, + { + "cell_type": "markdown", + "id": "portuguese-arthur", + "metadata": { + "id": "portuguese-arthur" + }, + "source": [ + "## Import Format\n", + "\n", + "**segments**: A segment represents a continuous section where an object is visible. If an instance disappears then the segment ends. If it re-appears, a new segment is created.\n", + "\n", + "**keyframes**: Key frames identify the location of an instance. Between keyframes, the location of the instance is interpolated.\n", + "\n", + "**bbox**: The coordinates of the bounding box" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5fc417c5", + "metadata": { + "id": "5fc417c5" + }, + "outputs": [], + "source": [ + "segments = [{\n", + " \"keyframes\": [{\n", + " \"frame\": 1,\n", + " \"bbox\": {\n", + " \"top\": 80,\n", + " \"left\": 80,\n", + " \"height\": 80,\n", + " \"width\": 80\n", + " }\n", + " }, {\n", + " \"frame\": 20,\n", + " \"bbox\": {\n", + " \"top\": 125,\n", + " \"left\": 125,\n", + " \"height\": 200,\n", + " \"width\": 300\n", + " }\n", + " }]\n", + "}, {\n", + " \"keyframes\": [{\n", + " \"frame\": 27,\n", + " \"bbox\": {\n", + " \"top\": 80,\n", + " \"left\": 50,\n", + " \"height\": 80,\n", + " \"width\": 50\n", + " }\n", + " }]\n", + "}]" + ] + }, + { + "cell_type": "markdown", + "id": "convertible-entry", + "metadata": { + "id": "convertible-entry" + }, + "source": [ + "##### Create helper functions to make this much easier" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "developing-beauty", + "metadata": { + "id": "developing-beauty" + }, + "outputs": [], + "source": [ + "def create_video_bbox_ndjson(datarow_id: str, schema_id: str,\n", + " segments: Dict[str, Any]) -> Dict[str, Any]:\n", + " return {\n", + " \"uuid\": str(uuid.uuid4()),\n", + " \"schemaId\": schema_id,\n", + " \"dataRow\": {\n", + " \"id\": datarow_id\n", + " },\n", + " \"segments\": segments\n", + " }" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "asian-savings", + "metadata": { + "id": "asian-savings" + }, + "outputs": [], + "source": [ + "uploads_li = []\n", + "uploads_mal = []\n", + "\n", + "\n", + "for data_row in dataset.data_rows():\n", + " uploads_li.append(\n", + " create_video_bbox_ndjson(data_row.uid, schema_lookup_li['jellyfish'],\n", + " segments))\n", + " \n", + "for data_row in dataset.data_rows():\n", + " uploads_mal.append(\n", + " create_video_bbox_ndjson(data_row.uid, schema_lookup_mal['jellyfish'],\n", + " segments))" + ] + }, + { + "cell_type": "markdown", + "id": "perfect-seafood", + "metadata": { + "id": "perfect-seafood" + }, + "source": [ + "### Upload the annotations with MAL import" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "entire-community", + "metadata": { + "id": "entire-community" + }, + "outputs": [], + "source": [ + "# Let's upload!\n", + "# Validate must be set to false for video bounding boxes\n", + "upload_job = MALPredictionImport.create_from_objects(\n", + " client = client, \n", + " project_id = mal_project.uid, \n", + " name=\"MAL_upload_label_import_job_demo\", \n", + " predictions=uploads_mal\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "hollywood-faculty", + "metadata": { + "id": "hollywood-faculty", + "outputId": "f7f01a91-0fbc-4d7c-903d-a699734f05f0", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "id": "hollywood-faculty", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[]\n" - ] - } - ], - "source": [ - "# Wait for upload to finish (Will take up to five minutes)\n", - "upload_task.wait_until_done()\n", - "# Review the upload status\n", - "print(upload_task.errors)" - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Errors: []\n" + ] } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + ], + "source": [ + "# Wait for upload to finish (Will take up to five minutes)\n", + "upload_job.wait_until_done()\n", + "# Review the upload status\n", + "print(\"Errors: \",upload_job.errors)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Upload the annotations with LabelImport" + ], + "metadata": { + "id": "GOgi7lwRFU8Q" + }, + "id": "GOgi7lwRFU8Q" + }, + { + "cell_type": "code", + "source": [ + "upload_job_li = LabelImport.create_from_objects(\n", + " client = client,\n", + " project_id = li_project.uid, \n", + " name = \"LI_upload_label_import_job_demo\",\n", + " labels=uploads_li\n", + ")" + ], + "metadata": { + "id": "s1iFk1qOm66L" + }, + "id": "s1iFk1qOm66L", + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Wait for upload to finish (Will take up to five minutes)\n", + "upload_job_li.wait_until_done()\n", + "# Review the upload status\n", + "print(\"Errors: \",upload_job_li.errors)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.8" + "id": "UjNPwt8qFS3A", + "outputId": "37fa8f2c-6cb3-46d1-c456-5d0de7605b51" + }, + "id": "UjNPwt8qFS3A", + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Errors: []\n" + ] } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Cleanup" + ], + "metadata": { + "id": "I4bW74Yd4_Za" + }, + "id": "I4bW74Yd4_Za" + }, + { + "cell_type": "code", + "source": [ + "# li_project.delete()\n", + "# mal_project.delete()\n", + "# dataset.delete()" + ], + "metadata": { + "id": "4wVPvcXc42jQ" + }, + "id": "4wVPvcXc42jQ", + "execution_count": 15, + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" }, - "nbformat": 4, - "nbformat_minor": 5 + "colab": { + "provenance": [], + "collapsed_sections": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 }