From 19b87737f2a4a36bb50281cd99bd693346960683 Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Tue, 24 Jan 2023 10:11:45 -0500
Subject: [PATCH 1/6] Standardized format updates
---
.../prediction_upload/text_predictions.ipynb | 1319 ++++++++---------
1 file changed, 601 insertions(+), 718 deletions(-)
diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb
index 4285a43e5..609561385 100644
--- a/examples/prediction_upload/text_predictions.ipynb
+++ b/examples/prediction_upload/text_predictions.ipynb
@@ -1,1020 +1,903 @@
{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "collapsed_sections": [
+ "RgBYFUxa-VGT",
+ "FTGAI730UlZ3"
+ ]
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
"cells": [
{
"cell_type": "markdown",
- "id": "a6a048e8-b5fe-418b-aec4-829b5b6802e5",
"metadata": {
"id": "a6a048e8-b5fe-418b-aec4-829b5b6802e5"
},
"source": [
"
\n",
- " \n",
+ " \n",
" | "
]
},
{
"cell_type": "markdown",
- "id": "51cf1362-1cde-4749-aac7-5fb94473baa7",
"metadata": {
"id": "51cf1362-1cde-4749-aac7-5fb94473baa7"
},
"source": [
"\n",
- "![]() \n",
" | \n",
"\n",
"\n",
- "![]() \n",
" | "
]
},
{
"cell_type": "markdown",
- "id": "339795d3-e36c-4470-8605-62bfdd5eea29",
- "metadata": {
- "id": "339795d3-e36c-4470-8605-62bfdd5eea29"
- },
"source": [
"# Text Prediction Import\n",
"* This notebook walks you through the process of uploading model predictions to a Model Run. This notebook provides an example for each supported prediction type for text assets. \n",
"\n",
- "A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle."
- ]
- },
- {
- "cell_type": "markdown",
- "id": "e76f007b-9465-4acd-9008-20e25e4a4b98",
+ "Supported annotations that can be uploaded through the SDK: \n",
+ "\n",
+ "* Entity\n",
+ "* Classification radio \n",
+ "* Classification checklist \n",
+ "* Classification free-form text \n",
+ "\n",
+ "**Not** supported:\n",
+ "* Segmentation mask\n",
+ "* Polygon\n",
+ "* Bounding box \n",
+ "* Polyline\n",
+ "* Point \n",
+ "\n",
+ "\n",
+ "A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle.\n",
+ "\n"
+ ],
"metadata": {
- "id": "e76f007b-9465-4acd-9008-20e25e4a4b98"
- },
- "source": [
- "* For information on what types of predictions are supported per data type, refer to this documentation:\n",
- " * https://docs.labelbox.com/docs/upload-model-predictions#step-6-create-the-predictions-payload"
- ]
+ "id": "9znxMjDYGi0Y"
+ }
},
{
"cell_type": "markdown",
- "id": "f53d50fc-8d3c-452b-9aaf-f6170aaa5576",
- "metadata": {
- "id": "f53d50fc-8d3c-452b-9aaf-f6170aaa5576"
- },
"source": [
- "* Notes:\n",
- " * If you are importing more than 1,000 mask predictions at a time, consider submitting separate jobs, as they can take longer than other prediction types to import.\n",
- " * After the execution of this notebook a complete Model Run with predictions will be created in your organization. "
- ]
- },
- {
- "cell_type": "markdown",
- "id": "5effdaa3-e701-4804-aa33-bbbaed99eb92",
+ "## Setup"
+ ],
"metadata": {
- "id": "5effdaa3-e701-4804-aa33-bbbaed99eb92"
- },
- "source": [
- "# Installs"
- ]
+ "id": "UtJHIuE8HDRI"
+ }
},
{
"cell_type": "code",
- "execution_count": 1,
- "id": "4d63074b-2379-48af-b9d6-2a66190f03c4",
- "metadata": {
- "id": "4d63074b-2379-48af-b9d6-2a66190f03c4"
- },
- "outputs": [],
"source": [
"!pip install -q 'labelbox[data]'"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "6f3cdca1-524f-4247-a63b-2d4371b0257d",
+ ],
"metadata": {
- "id": "6f3cdca1-524f-4247-a63b-2d4371b0257d"
+ "id": "cm8xMaLbGb7v"
},
- "source": [
- "# Imports"
- ]
+ "execution_count": null,
+ "outputs": []
},
{
"cell_type": "code",
- "execution_count": 2,
- "id": "01fca8c9-0680-4a9c-a11e-1b49f31e9121",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "01fca8c9-0680-4a9c-a11e-1b49f31e9121",
- "outputId": "192cbd39-3d13-40bc-c585-f4c5bfd600e0"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "3.33.1\n"
- ]
- }
- ],
"source": [
- "import labelbox\n",
"from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n",
- "from labelbox.schema.queue_mode import QueueMode\n",
- "from labelbox import Client, LabelingFrontend, LabelImport, MediaType\n",
+ "from labelbox import Client, MALPredictionImport, LabelImport\n",
+ "from labelbox.data.serialization import NDJsonConverter\n",
+ "from labelbox.schema.media_type import MediaType\n",
"from labelbox.data.annotation_types import (\n",
- " Label, TextData, ObjectAnnotation, TextEntity,\n",
- " Radio, Checklist, Text,\n",
- " ClassificationAnnotation, ClassificationAnswer\n",
+ " Label, TextData, Checklist, Radio, ObjectAnnotation, TextEntity,\n",
+ " ClassificationAnnotation, ClassificationAnswer, LabelList, Text, ImageData\n",
")\n",
- "from labelbox.data.serialization import NDJsonConverter\n",
- "import json\n",
"import uuid\n",
- "import copy\n",
"import numpy as np\n",
- "print(labelbox.__version__)"
- ]
+ "from labelbox.schema.queue_mode import QueueMode"
+ ],
+ "metadata": {
+ "id": "NIq-6M9kHKSs"
+ },
+ "execution_count": 2,
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "a72d96e8-33ce-434d-b330-393e1c31702a",
- "metadata": {
- "id": "a72d96e8-33ce-434d-b330-393e1c31702a"
- },
"source": [
- "# API Key and Client\n",
- "Provide a valid api key below in order to properly connect to the Labelbox Client."
- ]
+ "## Replace with your API Key \n",
+ "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)"
+ ],
+ "metadata": {
+ "id": "pZ2rBqY8HQoe"
+ }
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "86003724-4807-4281-95c1-5284a6f9609f",
- "metadata": {
- "id": "86003724-4807-4281-95c1-5284a6f9609f"
- },
- "outputs": [],
"source": [
- "# Add your api key as a string\n",
- "API_KEY = \"\"\n",
- "client = Client(api_key=API_KEY)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "960998ac-bde4-4184-8b7d-26d8e019cc7f",
+ "API_KEY = None\n",
+ "client = Client(API_KEY)"
+ ],
"metadata": {
- "id": "960998ac-bde4-4184-8b7d-26d8e019cc7f"
+ "id": "z7ZLKLYLHP__"
},
- "source": [
- "---- \n",
- "### Steps\n",
- "1. Make sure project is setup\n",
- "2. Collect annotations\n",
- "3. Upload"
- ]
+ "execution_count": 3,
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "82a7381b-2409-4ed3-9d25-881a1e1d8ca6",
- "metadata": {
- "id": "82a7381b-2409-4ed3-9d25-881a1e1d8ca6"
- },
"source": [
- "### Create a Model Run (for predictions) and a Project (for annotations)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "d51fd2e1-bf5f-4c61-bca1-929f43b076ed",
+ "## Supported Predictions"
+ ],
"metadata": {
- "id": "d51fd2e1-bf5f-4c61-bca1-929f43b076ed"
- },
- "source": [
- "We will be creating \n",
- "- a Model and a Model Run to contain model predictions\n",
- "- a project to contain annotations"
- ]
+ "id": "RgBYFUxa-VGT"
+ }
},
{
- "cell_type": "markdown",
- "id": "49i_juOUr6av",
- "metadata": {
- "id": "49i_juOUr6av"
- },
+ "cell_type": "code",
"source": [
- "First, we create an ontology with all the possible tools and classifications supported for text. The official list of supported predictions and annotations that can be uploaded can be found:\n",
- "- [predictions that can be uploaded to a Model Run](https://docs.labelbox.com/docs/upload-model-predictions#step-6-create-the-predictions-payload)\n",
- "- [annotations that can be imported in a project as ground-truths](https://docs.labelbox.com/docs/import-ground-truth)\n",
+ "########## Entities ##########\n",
"\n",
- "Note: the ontology of the Model Run does not need to match the ontology of the project. However, only the features present in the Model Run ontology can be uploaded as predictions and annotations to the Model Run."
- ]
+ "# Python annotation\n",
+ "named_entity = TextEntity(start=10, end=20)\n",
+ "entities_prediction = ObjectAnnotation(value=named_entity, name = \"named_entity\", confidence=0.5)\n",
+ "\n",
+ "\n",
+ "# NDJSON\n",
+ "entities_prediction_ndjson = { \n",
+ " \"name\": \"named_entity\",\n",
+ " \"confidence\": 0.5, \n",
+ " \"location\": { \n",
+ " \"start\": 67, \n",
+ " \"end\": 128 \n",
+ " }\n",
+ "}"
+ ],
+ "metadata": {
+ "id": "FJhCAqeR-cUJ"
+ },
+ "execution_count": 4,
+ "outputs": []
},
{
"cell_type": "code",
- "execution_count": 4,
- "id": "f9f9287c-aad7-4914-bc87-1453fb8bce81",
- "metadata": {
- "id": "f9f9287c-aad7-4914-bc87-1453fb8bce81"
- },
- "outputs": [],
"source": [
- "ontology_builder = OntologyBuilder(\n",
- " tools=[\n",
- " Tool(tool=Tool.Type.NER, name=\"named_entity\")\n",
- " ],\n",
- " classifications=[ # List of Classification objects\n",
- " Classification( # Text classification given the name \"text\"\n",
- " class_type=Classification.Type.TEXT,\n",
- " instructions=\"text\"), \n",
- " Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n",
- " class_type=Classification.Type.CHECKLIST, \n",
- " instructions=\"checklist\", \n",
- " options=[\n",
- " Option(value=\"first_checklist_answer\"),\n",
- " Option(value=\"second_checklist_answer\") \n",
- " ]\n",
- " ), \n",
- " Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n",
- " class_type=Classification.Type.RADIO, \n",
- " instructions=\"radio\", \n",
- " options=[\n",
- " Option(value=\"first_radio_answer\"),\n",
- " Option(value=\"second_radio_answer\")\n",
- " ]\n",
+ "########## Classification - Radio (single choice ) ##########\n",
+ "\n",
+ "# Python annotation \n",
+ "radio_prediction = ClassificationAnnotation(\n",
+ " name=\"radio_question\",\n",
+ " value=Radio(answer = \n",
+ " ClassificationAnswer(name = \"first_radio_answer\", confidence=0.5)\n",
" )\n",
- " ]\n",
")\n",
"\n",
- "ontology = client.create_ontology(\"Ontology Text\", ontology_builder.asdict())"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1GdimALBuzRU",
+ "\n",
+ "# NDJSON\n",
+ "radio_prediction_ndjson = {\n",
+ " 'name': 'radio_question',\n",
+ " 'confidence': 0.5,\n",
+ " 'answer': {'name': 'first_radio_answer', 'confidence': 0.5}\n",
+ "} "
+ ],
"metadata": {
- "id": "1GdimALBuzRU"
+ "id": "SOOR0zCBKef_"
},
- "source": [
- "We create a Model and a Model Run, to contain the predictions. "
- ]
+ "execution_count": 5,
+ "outputs": []
},
{
"cell_type": "code",
- "execution_count": 6,
- "id": "hANaXddn5Krs",
+ "source": [
+ "#### Nested Classifications ######\n",
+ "\n",
+ "# Python annotation\n",
+ "radio_prediction_nested = ClassificationAnnotation(\n",
+ " name=\"radio_question_sub\", \n",
+ " value=Radio(answer = ClassificationAnswer(name = \"first_radio_answer\", confidence=0.5)),\n",
+ " classifications=[\n",
+ " \tClassificationAnnotation(\n",
+ " \tname=\"sub_radio_question\",\n",
+ " \t\tvalue=Radio(answer=ClassificationAnswer(name=\"first_sub_radio_answer\", confidence=0.5))\n",
+ " )\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "# NDJSON\n",
+ "nested_radio_prediction_ndjson = {\n",
+ " 'name': 'radio_question_sub',\n",
+ " 'answer': {\n",
+ " 'name': 'first_radio_answer',\n",
+ " \"confidence\": 0.5,\n",
+ " 'classifications': [{\n",
+ " 'name':'sub_radio_question',\n",
+ " 'answer': { 'name' : 'first_sub_radio_answer', 'confidence': 0.5 }\n",
+ " }]\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "# Nested classification for checklits is only supported with NDJSON tools\n",
+ "nested_checklist_prediction_ndjson = {\n",
+ " \"name\": \"nested_checklist_question\",\n",
+ " \"answer\": [{\n",
+ " \"name\": \"first_checklist_answer\", \n",
+ " \"confidence\": 0.5,\n",
+ " \"classifications\" : [\n",
+ " {\n",
+ " \"name\": \"sub_checklist_question\", \n",
+ " \"answer\": {\"name\": \"first_sub_checklist_answer\", \"confidence\": 0.5 }\n",
+ " } \n",
+ " ] \n",
+ " }]\n",
+ "}"
+ ],
"metadata": {
- "id": "hANaXddn5Krs"
+ "id": "cXKGv0BeNXQi"
},
- "outputs": [],
- "source": [
- "# create Model\n",
- "model = client.create_model(name=\"text_model_run\", \n",
- " ontology_id=ontology.uid)\n",
- "# create Model Run\n",
- "model_run = model.create_model_run(\"iteration 1\")"
- ]
+ "execution_count": 6,
+ "outputs": []
},
{
- "cell_type": "markdown",
- "id": "EC_D3TFX5LBo",
+ "cell_type": "code",
+ "source": [
+ "########## Checklist ##########\n",
+ "\n",
+ "# Python annotation\n",
+ "checklist_prediction = ClassificationAnnotation(\n",
+ " name=\"checklist_question\",\n",
+ " value=Checklist(\n",
+ " answer = [\n",
+ " ClassificationAnswer(\n",
+ " name = \"first_checklist_answer\",\n",
+ " confidence=0.5\n",
+ " ),\n",
+ " ClassificationAnswer(\n",
+ " name = \"second_checklist_answer\", \n",
+ " confidence=0.5\n",
+ " ),\n",
+ " ClassificationAnswer(\n",
+ " name = \"third_checklist_answer\", \n",
+ " confidence=0.5\n",
+ " )\n",
+ " ])\n",
+ " )\n",
+ "\n",
+ "\n",
+ "# NDJSON\n",
+ "checklist_prediction_ndjson = {\n",
+ " 'name': 'checklist_question',\n",
+ " 'confidence': 0.5,\n",
+ " 'answer': [\n",
+ " {'name': 'first_checklist_answer', 'confidence': 0.5}\n",
+ " ]\n",
+ "}\n",
+ "\n",
+ "\n"
+ ],
"metadata": {
- "id": "EC_D3TFX5LBo"
+ "id": "vdwPTRkP6JIx"
},
- "source": [
- "We create a project, to contain the annotations."
- ]
+ "execution_count": 7,
+ "outputs": []
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "044e9194-d21d-403e-b64c-047c1063b0fe",
+ "source": [
+ "########## Classification Free-Form text ##########\n",
+ "\n",
+ "# Python annotation\n",
+ "text_prediction = ClassificationAnnotation(\n",
+ " name = \"free_text\", \n",
+ " value = Text(answer=\"sample text\")\n",
+ ")\n",
+ "\n",
+ "# NDJSON\n",
+ "text_prediction_ndjson = {\n",
+ " 'name': 'free_text',\n",
+ " 'answer': 'sample text'\n",
+ "}"
+ ],
"metadata": {
- "id": "044e9194-d21d-403e-b64c-047c1063b0fe"
+ "id": "ie2Zni_d6MI8"
},
- "outputs": [],
- "source": [
- "# Create a Labelbox project\n",
- "project = client.create_project(name=\"text_project\", \n",
- " queue_mode=QueueMode.Batch,\n",
- " # Quality Settings setup \n",
- " auto_audit_percentage=1,\n",
- " auto_audit_number_of_labels=1,\n",
- " media_type=MediaType.Text)\n",
- "project.setup_editor(ontology)"
- ]
+ "execution_count": 8,
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "o9JbDSBH5fsF",
- "metadata": {
- "id": "o9JbDSBH5fsF"
- },
"source": [
- "### Create a dataset with a data row\n",
- "We will upload predictions and annotations on this data row. "
- ]
+ "## Step 1: Import data rows into Catalog"
+ ],
+ "metadata": {
+ "id": "U-o15yu9IPDo"
+ }
},
{
"cell_type": "code",
- "execution_count": 8,
- "id": "WCFSlblL5gDc",
+ "source": [
+ "# send a sample image as batch to the project\n",
+ "test_img_url = {\n",
+ " \"row_data\": \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n",
+ " \"global_key\": str(uuid.uuid4())\n",
+ "}\n",
+ "dataset = client.create_dataset(name=\"text_prediction_import\")\n",
+ "data_row = dataset.create_data_row(test_img_url)\n",
+ "print(data_row)"
+ ],
"metadata": {
+ "id": "HjH9gTV8IBG9",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "WCFSlblL5gDc",
- "outputId": "07b79f3d-07a1-4eb8-fd0a-c8ab7dfdaf13"
+ "outputId": "c7ac76d1-3e11-48ed-f012-6bf8038856c3"
},
+ "execution_count": 9,
"outputs": [
{
- "name": "stdout",
"output_type": "stream",
+ "name": "stdout",
"text": [
"\n"
]
}
- ],
- "source": [
- "# # Create one Labelbox dataset\n",
- "dataset = client.create_dataset(name=\"text_prediction_import_demo_dataset\")\n",
- "# Grab an example text and create a Labelbox data row in the dataset\n",
- "uploads = {\n",
- " \"row_data\": \"https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt\",\n",
- " # To learn more about Global Keys : https://docs.labelbox.com/docs/global-keys\n",
- " \"global_key\": \"TEST-ID-%id\" % uuid.uuid1()\n",
- " }\n",
- "data_row = dataset.create_data_row(uploads)\n",
- "print(data_row)"
]
},
{
"cell_type": "markdown",
- "id": "8eRGvN8ynJD6",
- "metadata": {
- "id": "8eRGvN8ynJD6"
- },
"source": [
- "### Send the data row to the Model Run and to the project"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "U-yBDwZuBn_M",
+ "## Step 2: Create/select an Ontology for your model predictions\n",
+ "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n"
+ ],
"metadata": {
- "id": "U-yBDwZuBn_M"
- },
- "source": [
- "Get the data row IDs that we just uploaded"
- ]
+ "id": "oy0umzuNIceP"
+ }
},
{
"cell_type": "code",
- "execution_count": 9,
- "id": "nphpP2OmBnGQ",
+ "source": [
+ "## Setup the ontology and link the tools created above.\n",
+ "\n",
+ "ontology_builder = OntologyBuilder(\n",
+ " classifications=[ # List of Classification objects\n",
+ " Classification( \n",
+ " class_type=Classification.Type.RADIO, \n",
+ " instructions=\"radio_question\", \n",
+ " options=[Option(value=\"first_radio_answer\")]\n",
+ " ),\n",
+ " Classification( \n",
+ " class_type=Classification.Type.RADIO, \n",
+ " instructions=\"radio_question_sub\", \n",
+ " options=[\n",
+ " Option(value=\"first_radio_answer\",\n",
+ " options=[\n",
+ " Classification(\n",
+ " class_type=Classification.Type.RADIO,\n",
+ " instructions=\"sub_radio_question\",\n",
+ " options=[\n",
+ " Option(value=\"first_sub_radio_answer\")\n",
+ " ]\n",
+ " ),\n",
+ " ]\n",
+ " )\n",
+ " ],\n",
+ " ),\n",
+ " Classification( \n",
+ " class_type=Classification.Type.CHECKLIST, \n",
+ " instructions=\"checklist_question\", \n",
+ " options=[\n",
+ " Option(value=\"first_checklist_answer\"),\n",
+ " Option(value=\"second_checklist_answer\"), \n",
+ " Option(value=\"third_checklist_answer\") \n",
+ " ]\n",
+ " ), \n",
+ " Classification( \n",
+ " class_type=Classification.Type.TEXT,\n",
+ " instructions=\"free_text\"\n",
+ " ),\n",
+ " Classification(\n",
+ " class_type=Classification.Type.CHECKLIST, \n",
+ " instructions=\"nested_checklist_question\",\n",
+ " options=[\n",
+ " Option(\"first_checklist_answer\",\n",
+ " options=[\n",
+ " Classification(\n",
+ " class_type=Classification.Type.CHECKLIST, \n",
+ " instructions=\"sub_checklist_question\", \n",
+ " options=[Option(\"first_sub_checklist_answer\")]\n",
+ " )\n",
+ " ]\n",
+ " )\n",
+ " ]\n",
+ " )\n",
+ " ],\n",
+ " tools=[ # List of Tool objects\n",
+ " Tool(tool=Tool.Type.NER, \n",
+ " name=\"named_entity\")\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "ontology = client.create_ontology(\"Ontology Text Predictions\", ontology_builder.asdict())\n"
+ ],
"metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "nphpP2OmBnGQ",
- "outputId": "fed77309-8e3a-47c9-b92c-d00ddf305b44"
+ "id": "Kt4XWWqgIiWk"
},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "datarow_ids: ['clbz312d2212e07wldd4zf01r']\n"
- ]
- }
- ],
- "source": [
- "# Data row ID(s) to send to the Model Run and to the project.\n",
- "datarow_ids = [dr.uid for dr in list(dataset.export_data_rows())]\n",
- "print(\"datarow_ids: \",datarow_ids)"
- ]
+ "execution_count": 10,
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "38FLeQKMBF9z",
- "metadata": {
- "id": "38FLeQKMBF9z"
- },
"source": [
- "Send the data row to the Model Run"
- ]
- },
- {
+ "## Step 3: Create a Model and Model Run"
+ ],
+ "metadata": {
+ "id": "ZjN8jxHvIvHP"
+ }
+ },
+ {
"cell_type": "code",
- "execution_count": 10,
- "id": "T1vk_EvzBI3u",
+ "source": [
+ "# create Model\n",
+ "model = client.create_model(name=\"text_model_run_\"+ str(uuid.uuid4()), \n",
+ " ontology_id=ontology.uid)\n",
+ "# create Model Run\n",
+ "model_run = model.create_model_run(\"iteration 1\")"
+ ],
"metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "T1vk_EvzBI3u",
- "outputId": "4a4a6d46-a041-43bf-d8bd-c7e7aea44db5"
+ "id": "8n-AvzdiOR6d"
},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "model_run.upsert_data_rows(datarow_ids)"
- ]
+ "execution_count": 11,
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "5D7wBx41BJa9",
- "metadata": {
- "id": "5D7wBx41BJa9"
- },
"source": [
- "Send the data row to the project"
- ]
+ "## Step 4: Send data rows to the Model Run"
+ ],
+ "metadata": {
+ "id": "NX6L0axRJN5J"
+ }
},
{
"cell_type": "code",
- "execution_count": 11,
- "id": "yfNPsINLnPcO",
+ "source": [
+ "model_run.upsert_data_rows([data_row.uid])"
+ ],
"metadata": {
+ "id": "6sngCgIwJSae",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "yfNPsINLnPcO",
- "outputId": "ddd45aea-ddfa-4f3d-e249-850234288c29"
+ "outputId": "d38454d2-cad9-425d-9070-fc8ca4ea9de5"
},
+ "execution_count": 12,
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
- ""
+ "True"
]
},
- "execution_count": 11,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 12
}
- ],
- "source": [
- "project.create_batch(\n",
- " \"first-batch\", # Each batch in a project must have a unique name\n",
- " datarow_ids, # A list of data rows or data row ids\n",
- " 5 # priority between 1(Highest) - 5(lowest)\n",
- ")"
]
},
{
"cell_type": "markdown",
- "id": "8da94c48-72a5-4535-ab66-6d14b0b79aed",
- "metadata": {
- "id": "8da94c48-72a5-4535-ab66-6d14b0b79aed"
- },
"source": [
- "### Create the predictions payload\n",
- "We will upload it to the Model Run.\n",
+ "## Step 5. Create the predictions payload\n",
"\n",
+ "Create the annotations payload using the snippets of [code here](https://https://docs.labelbox.com/reference/import-image-annotations#supported-annotations).\n",
"\n",
- "It is recommended to use the Python SDK's annotation types when importing labels into Labelbox."
- ]
- },
- {
- "cell_type": "markdown",
- "id": "ee9b9191-6c0d-4cba-859b-e2e9a1b887c8",
+ "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n",
+ "\n",
+ "The resulting label_ndjson should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)"
+ ],
"metadata": {
- "id": "ee9b9191-6c0d-4cba-859b-e2e9a1b887c8"
- },
- "source": [
- "Object predictions"
- ]
+ "id": "6FZyvnrqSGuc"
+ }
},
{
"cell_type": "code",
- "execution_count": 12,
- "id": "qzBqhV4Pv3yp",
- "metadata": {
- "id": "qzBqhV4Pv3yp"
- },
- "outputs": [],
"source": [
- "# Confidence scores are optional.\n",
- "# If no confidence is provided, \n",
- "# the prediction will be treated as if the confidence score equals 1\n",
+ "# Create a Label for predictions\n",
+ "label_prediction = Label(\n",
+ " data=ImageData(uid=data_row.uid),\n",
+ " annotations = [\n",
+ " entities_prediction, \n",
+ " radio_prediction, \n",
+ " radio_prediction_nested,\n",
+ " checklist_prediction,\n",
+ " text_prediction\n",
+ " ]\n",
+ ")\n",
"\n",
- "named_entity = TextEntity(start=12,end=22)\n",
- "named_entity_prediction = ObjectAnnotation(value=named_entity, name=\"named_entity\", confidence=0.5)"
- ]
+ "# Create a label list \n",
+ "label_list_prediction = [label_prediction]\n",
+ "\n",
+ "# Convert the prediction label from a Labelbox class object to the underlying NDJSON format required for upload - uploads can be directly built in this syntax as well\n",
+ "ndjson_prediction = list(NDJsonConverter.serialize(label_list_prediction))"
+ ],
+ "metadata": {
+ "id": "zv2OLTXKSGWv"
+ },
+ "execution_count": 13,
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "291f9c97-37ba-42f5-b8f0-e118bdc5c848",
- "metadata": {
- "id": "291f9c97-37ba-42f5-b8f0-e118bdc5c848"
- },
"source": [
- "Classification predictions"
- ]
+ "If using NDJSON: "
+ ],
+ "metadata": {
+ "id": "YXdXkDcIV4bs"
+ }
},
{
"cell_type": "code",
- "execution_count": 13,
- "id": "f2RtQQPCymOB",
- "metadata": {
- "id": "f2RtQQPCymOB"
- },
- "outputs": [],
"source": [
- "# Confidence scores are optional.\n",
- "# If no confidence is provided, \n",
- "# the prediction will be treated as if the confidence score equals 1\n",
"\n",
- "checklist_prediction=ClassificationAnnotation(\n",
- " value=Checklist(\n",
- " answer=[ # List of the checklist answers in your ontology\n",
- " ClassificationAnswer(\n",
- " name=\"first_checklist_answer\",\n",
- " confidence=0.5\n",
- " ),\n",
- " ClassificationAnswer(\n",
- " name=\"second_checklist_answer\",\n",
- " confidence=0.5\n",
- " )\n",
- " ]\n",
- " ), \n",
- " name=\"checklist\" # Name of the classification in your ontology\n",
- ")\n",
- "\n",
- "radio_prediction=ClassificationAnnotation(\n",
- " value=Radio(\n",
- " answer=ClassificationAnswer(\n",
- " name=\"first_radio_answer\", # Name of the radio answer in your ontology\n",
- " confidence=0.5\n",
- " )\n",
- " ), \n",
- " name=\"radio\" # Name of the classification in your ontology\n",
- ")\n",
- "\n",
- "# Confidence is not supported for text prediction\n",
- "text_prediction=ClassificationAnnotation(\n",
- " value=Text( # String value for the text annotation\n",
- " answer=\"the answer to the text question\",\n",
- " ), \n",
- " name=\"text\" # Name of the classification in your ontology\n",
- ")\n"
- ]
+ "ndjson_prediction_method2 = []\n",
+ "for annot in [\n",
+ " entities_prediction_ndjson, \n",
+ " radio_prediction_ndjson, \n",
+ " nested_radio_prediction_ndjson,\n",
+ " checklist_prediction_ndjson,\n",
+ " text_prediction_ndjson, \n",
+ " nested_checklist_prediction_ndjson\n",
+ " ]:\n",
+ " annot.update({\n",
+ " 'uuid': str(uuid.uuid4()),\n",
+ " 'dataRow': {'id': data_row.uid},\n",
+ " })\n",
+ " ndjson_prediction_method2.append(annot)"
+ ],
+ "metadata": {
+ "id": "F-Y7sSyAV3tn"
+ },
+ "execution_count": 14,
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "15bd593b-509d-4114-af95-ae0be081c42d",
- "metadata": {
- "id": "15bd593b-509d-4114-af95-ae0be081c42d"
- },
"source": [
- "Create a Label object with all of the predictions created previously."
- ]
+ "## Step 6. Upload the predictions payload to the Model Run "
+ ],
+ "metadata": {
+ "id": "viFHCnBeTD1Y"
+ }
},
{
"cell_type": "code",
- "execution_count": 14,
- "id": "6d72fe25-ff7e-4e0a-94cf-095e4df73da0",
+ "source": [
+ "# Upload the prediction label to the Model Run\n",
+ "upload_job_prediction = model_run.add_predictions(\n",
+ " name=\"prediction_upload_job\"+str(uuid.uuid4()),\n",
+ " predictions= ndjson_prediction_method2)\n",
+ "\n",
+ "# Errors will appear for annotation uploads that failed.\n",
+ "print(\"Errors:\", upload_job_prediction.errors)\n",
+ "print(\" \")"
+ ],
"metadata": {
+ "id": "uCI8pLTITQNG",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "6d72fe25-ff7e-4e0a-94cf-095e4df73da0",
- "outputId": "6bb7469b-172b-4c07-9b12-8179dd825779"
+ "outputId": "c6de12a6-1575-4c1e-d6a3-310d55f29996"
},
+ "execution_count": 15,
"outputs": [
{
- "data": {
- "text/plain": [
- "{'uid': None,\n",
- " 'data': TextData(file_path=None,text=None,url=None),\n",
- " 'annotations': [ObjectAnnotation(confidence=None, name='named_entity', feature_schema_id=None, extra={}, value=TextEntity(start=12, end=22, extra={}), classifications=[]),\n",
- " ClassificationAnnotation(name='text', feature_schema_id=None, extra={}, value=Text(answer='the answer to the text question')),\n",
- " ClassificationAnnotation(name='checklist', feature_schema_id=None, extra={}, value=Checklist(name='checklist', answer=[ClassificationAnswer(confidence=0.5, name='first_checklist_answer', feature_schema_id=None, extra={}, keyframe=None), ClassificationAnswer(confidence=0.5, name='second_checklist_answer', feature_schema_id=None, extra={}, keyframe=None)])),\n",
- " ClassificationAnnotation(name='radio', feature_schema_id=None, extra={}, value=Radio(answer=ClassificationAnswer(confidence=0.5, name='first_radio_answer', feature_schema_id=None, extra={}, keyframe=None)))],\n",
- " 'extra': {}}"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Errors: []\n",
+ " \n"
+ ]
}
- ],
- "source": [
- "# Create a Label object by identifying the applicavle data row in Labelbox and providing a list of annotations\n",
- "label_prediction = Label(\n",
- " data=TextData(\n",
- " uid=data_row.uid),\n",
- " annotations = [\n",
- " named_entity_prediction,\n",
- " text_prediction, checklist_prediction, radio_prediction,\n",
- " ]\n",
- ")\n",
- "\n",
- "# Create urls to mask data for upload\n",
- "def signing_function(obj_bytes: bytes) -> str:\n",
- " url = client.upload_data(content=obj_bytes, sign=True)\n",
- " return url\n",
- "\n",
- "label_prediction.add_url_to_masks(signing_function)\n",
- "\n",
- "label_prediction.__dict__"
]
},
{
"cell_type": "markdown",
- "id": "gAva__YCCzjL",
- "metadata": {
- "id": "gAva__YCCzjL"
- },
"source": [
- "### Create the annotations payload\n",
- "We will upload it to the project.\n",
- "\n",
- "It is recommended to use the Python SDK's annotation types when importing labels into Labelbox."
- ]
+ "## Step 7: Send annotations to the Model Run \n",
+ "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run."
+ ],
+ "metadata": {
+ "id": "T-ZHWWI3JgmX"
+ }
},
{
"cell_type": "markdown",
- "id": "wbhzltpNCzjL",
- "metadata": {
- "id": "wbhzltpNCzjL"
- },
"source": [
- "Object annotations"
- ]
+ "##### 5.1. Create a labelbox project"
+ ],
+ "metadata": {
+ "id": "CYRiqHr2O_aL"
+ }
},
{
"cell_type": "code",
- "execution_count": 15,
- "id": "apatq9StCzjM",
- "metadata": {
- "id": "apatq9StCzjM"
- },
- "outputs": [],
"source": [
- "named_entity = TextEntity(start=10,end=20)\n",
- "named_entity_annotation = ObjectAnnotation(value=named_entity, name=\"named_entity\")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "aqSYAaBiCzjN",
+ "# Create a Labelbox project\n",
+ "project = client.create_project(name=\"Text Prediction Import\", \n",
+ " queue_mode=QueueMode.Batch,\n",
+ " # Quality Settings setup \n",
+ " auto_audit_percentage=1,\n",
+ " auto_audit_number_of_labels=1,\n",
+ " media_type=MediaType.Text)\n",
+ "project.setup_editor(ontology)"
+ ],
"metadata": {
- "id": "aqSYAaBiCzjN"
+ "id": "jEtoDiDrPFvI"
},
- "source": [
- "Classification annotations"
- ]
- },
- {
- "cell_type": "code",
"execution_count": 16,
- "id": "9NAdIp6OCzjN",
- "metadata": {
- "id": "9NAdIp6OCzjN"
- },
- "outputs": [],
- "source": [
- "text_annotation=ClassificationAnnotation(\n",
- " value=Text( # String value for the text annotation\n",
- " answer=\"the answer to the text question\" \n",
- " ), \n",
- " name=\"text\" # Name of the classification in your ontology\n",
- ")\n",
- "\n",
- "checklist_annotation=ClassificationAnnotation(\n",
- " value=Checklist(\n",
- " answer=[ # List of the checklist answers in your ontology\n",
- " ClassificationAnswer(name=\"first_checklist_answer\"),\n",
- " ClassificationAnswer(name=\"second_checklist_answer\")\n",
- " ]\n",
- " ), \n",
- " name=\"checklist\" # Name of the classification in your ontology\n",
- ")\n",
- "\n",
- "radio_annotation=ClassificationAnnotation(\n",
- " value=Radio(\n",
- " answer=ClassificationAnswer(\n",
- " name=\"second_radio_answer\" # Name of the radio answer in your ontology\n",
- " )\n",
- " ), \n",
- " name=\"radio\" # Name of the classification in your ontology\n",
- ")"
- ]
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "zMcYJxpMCzjN",
- "metadata": {
- "id": "zMcYJxpMCzjN"
- },
"source": [
- "Create a Label object with all of the annotations created previously."
- ]
+ "##### 5.2. Create a batch to send to the project "
+ ],
+ "metadata": {
+ "id": "7FEyC-nBPPuD"
+ }
},
{
"cell_type": "code",
- "execution_count": 17,
- "id": "OHofSRa1CzjN",
+ "source": [
+ "project.create_batch(\n",
+ " \"batch_text_prediction_demo\", # Each batch in a project must have a unique name\n",
+ " dataset.export_data_rows(), # A list of data rows or data row ids\n",
+ " 5 # priority between 1(Highest) - 5(lowest)\n",
+ ")"
+ ],
"metadata": {
+ "id": "WRr5tdVEPXXy",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "OHofSRa1CzjN",
- "outputId": "2313ab66-5266-4979-817f-44bb604d6ad6"
+ "outputId": "8cebf3ee-42ee-4389-e5a1-8c3495c198b5"
},
+ "execution_count": 17,
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
- "{'uid': None,\n",
- " 'data': TextData(file_path=None,text=None,url=None),\n",
- " 'annotations': [ObjectAnnotation(confidence=None, name='named_entity', feature_schema_id=None, extra={}, value=TextEntity(start=10, end=20, extra={}), classifications=[]),\n",
- " ClassificationAnnotation(name='text', feature_schema_id=None, extra={}, value=Text(answer='the answer to the text question')),\n",
- " ClassificationAnnotation(name='checklist', feature_schema_id=None, extra={}, value=Checklist(name='checklist', answer=[ClassificationAnswer(confidence=None, name='first_checklist_answer', feature_schema_id=None, extra={}, keyframe=None), ClassificationAnswer(confidence=None, name='second_checklist_answer', feature_schema_id=None, extra={}, keyframe=None)])),\n",
- " ClassificationAnnotation(name='radio', feature_schema_id=None, extra={}, value=Radio(answer=ClassificationAnswer(confidence=None, name='second_radio_answer', feature_schema_id=None, extra={}, keyframe=None)))],\n",
- " 'extra': {}}"
+ ""
]
},
- "execution_count": 17,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 17
}
- ],
- "source": [
- "# Create a Label object by identifying the applicavle data row in Labelbox and providing a list of annotations\n",
- "label_annotation = Label(\n",
- " data=TextData(\n",
- " uid=data_row.uid),\n",
- " annotations = [\n",
- " named_entity_annotation,\n",
- " text_annotation, checklist_annotation, radio_annotation\n",
- " ]\n",
- ")\n",
- "\n",
- "# Create urls to mask data for upload\n",
- "def signing_function(obj_bytes: bytes) -> str:\n",
- " url = client.upload_data(content=obj_bytes, sign=True)\n",
- " return url\n",
- "\n",
- "label_annotation.add_url_to_masks(signing_function)\n",
- "\n",
- "label_annotation.__dict__"
]
},
{
"cell_type": "markdown",
- "id": "KIEvdVzTGL09",
- "metadata": {
- "id": "KIEvdVzTGL09"
- },
"source": [
- "### Import the annotations payload in the project"
- ]
+ "##### 5.3 Create the annotations payload"
+ ],
+ "metadata": {
+ "id": "FTGAI730UlZ3"
+ }
},
{
"cell_type": "code",
- "execution_count": 18,
- "id": "d_95gQMAGJXq",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "d_95gQMAGJXq",
- "outputId": "a78b56c6-f80e-4561-b4fe-1641ddf828a6"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Errors: []\n"
- ]
- }
- ],
"source": [
- "## Create a label list \n",
- "label_list_annotation = [label_annotation]\n",
+ "entities_ndjson = { \n",
+ " \"name\": \"named_entity\",\n",
+ " \"location\": { \n",
+ " \"start\": 67, \n",
+ " \"end\": 128 \n",
+ " }\n",
+ "}\n",
"\n",
- "# Convert the annotation label from a Labelbox class object to the underlying NDJSON format required for upload - uploads can be directly built in this syntax as well\n",
- "ndjson_annotation = list(NDJsonConverter.serialize(label_list_annotation))\n",
+ "radio_annotation_ndjson = {\n",
+ " \"name\": \"radio_question\",\n",
+ " \"answer\": {\"name\": \"first_radio_answer\"}\n",
+ "} \n",
"\n",
- "# Upload the annotation label to the project using Label Import\n",
- "upload_job_annotation = LabelImport.create_from_objects(\n",
- " client = client,\n",
- " project_id = project.uid,\n",
- " name=\"annotation_import_job\",\n",
- " labels=ndjson_annotation)\n",
+ "radio_annotation_ndjson_with_subclass = {\n",
+ " \"name\": \"radio_question_sub\",\n",
+ " \"answer\": {\n",
+ " \"name\": \"first_radio_answer\",\n",
+ " \"classifications\": [{\n",
+ " \"name\":\"sub_radio_question\",\n",
+ " \"answer\": { \"name\" : \"first_sub_radio_answer\"}\n",
+ " }]\n",
+ " }\n",
+ "}\n",
"\n",
- "# This will provide information only after the upload_job is complete, so we do not need to worry about having to rerun\n",
- "upload_job_annotation.wait_until_done()\n",
- "# Errors will appear for annotation uploads that failed.\n",
- "print(\"Errors:\", upload_job_annotation.errors)"
- ]
+ "checklist_annotation_ndjson = {\n",
+ " \"name\": \"checklist_question\",\n",
+ " \"answer\": [\n",
+ " {\"name\": \"first_checklist_answer\"},\n",
+ " {\"name\": \"second_checklist_answer\"},\n",
+ " {\"name\": \"third_checklist_answer\"},\n",
+ " ]\n",
+ "}\n",
+ "\n",
+ "text_annotation_ndjson = {\n",
+ " \"name\": \"free_text\",\n",
+ " \"answer\": \"sample text\",\n",
+ "}\n",
+ "\n",
+ "nested_checklist_prediction_ndjson = {\n",
+ " \"name\": \"nested_checklist_question\",\n",
+ " \"answer\": [{\n",
+ " \"name\": \"first_checklist_answer\", \n",
+ " \"classifications\" : [\n",
+ " {\n",
+ " \"name\": \"sub_checklist_question\", \n",
+ " \"answer\": {\"name\": \"first_sub_checklist_answer\"}\n",
+ " } \n",
+ " ] \n",
+ " }]\n",
+ "}"
+ ],
+ "metadata": {
+ "id": "A8_HVvu9Uvfl"
+ },
+ "execution_count": 18,
+ "outputs": []
},
{
"cell_type": "markdown",
- "id": "tcQpab5_GR72",
- "metadata": {
- "id": "tcQpab5_GR72"
- },
"source": [
- "### Send the annotations to the Model Run"
- ]
+ "##### 5.3. Create the label object"
+ ],
+ "metadata": {
+ "id": "8QwmguFvPltl"
+ }
},
{
- "cell_type": "markdown",
- "id": "5I45AW4OHJvq",
+ "cell_type": "code",
+ "source": [
+ "ndjson_annotation = []\n",
+ "for annot in [\n",
+ " entities_ndjson, \n",
+ " radio_annotation_ndjson, \n",
+ " radio_annotation_ndjson_with_subclass,\n",
+ " checklist_annotation_ndjson,\n",
+ " text_annotation_ndjson,\n",
+ " nested_checklist_prediction_ndjson \n",
+ " ]:\n",
+ " annot.update({\n",
+ " 'uuid': str(uuid.uuid4()),\n",
+ " 'dataRow': {'id': data_row.uid},\n",
+ " })\n",
+ " ndjson_annotation.append(annot)"
+ ],
"metadata": {
- "id": "5I45AW4OHJvq"
+ "id": "9gD_alThQA3G"
},
+ "execution_count": 19,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
"source": [
- "Get the label IDs that we just uploaded\n"
- ]
+ "##### 5.4. Upload annotations to the project using Label Import"
+ ],
+ "metadata": {
+ "id": "nGVNQlvPQ-kF"
+ }
},
{
"cell_type": "code",
- "execution_count": 19,
- "id": "wGwk8s0SHiIg",
+ "source": [
+ "upload_job_annotation = LabelImport.create_from_objects(\n",
+ " client = client,\n",
+ " project_id = project.uid,\n",
+ " name=\"text_label_import_job\"+ str(uuid.uuid4()),\n",
+ " labels=ndjson_annotation)\n",
+ "\n",
+ "upload_job_annotation.wait_until_done()\n",
+ "# Errors will appear for annotation uploads that failed.\n",
+ "print(\"Errors:\", upload_job_annotation.errors)\n"
+ ],
"metadata": {
+ "id": "HYh9AzrlRYX-",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "wGwk8s0SHiIg",
- "outputId": "06c5c0eb-c5e0-4167-ed74-9d7df704a382"
+ "outputId": "b4e547d6-f212-4451-dcdb-4fc6559e5e7a"
},
+ "execution_count": 20,
"outputs": [
{
- "name": "stdout",
"output_type": "stream",
+ "name": "stdout",
"text": [
- "label_ids: ['clbz31a0f07k90g3ehh4pgghf']\n"
+ "Errors: []\n"
]
}
- ],
- "source": [
- "# get the labels id from the project\n",
- "label_ids = [x['ID'] for x in project.export_labels(download=True)]\n",
- "print(\"label_ids: \",label_ids)"
]
},
+ {
+ "cell_type": "markdown",
+ "source": [
+ "##### 5.5 Send the annotations to the Model Run"
+ ],
+ "metadata": {
+ "id": "Y3rgM-5cRrxM"
+ }
+ },
{
"cell_type": "code",
- "execution_count": 20,
- "id": "nZVuxM5yGR73",
+ "source": [
+ "# get the labels id from the project\n",
+ "label_ids = [x['ID'] for x in project.export_labels(download=True)]\n",
+ "model_run.upsert_labels(label_ids)"
+ ],
"metadata": {
+ "id": "i2BrS8CcSBzo",
"colab": {
"base_uri": "https://localhost:8080/"
},
- "id": "nZVuxM5yGR73",
- "outputId": "75d32863-89b3-4855-a6e7-8aeeb3677ed8"
+ "outputId": "427f64fb-1fbb-4a2f-e822-2f7907f044aa"
},
+ "execution_count": 21,
"outputs": [
{
+ "output_type": "execute_result",
"data": {
"text/plain": [
"True"
]
},
- "execution_count": 20,
"metadata": {},
- "output_type": "execute_result"
+ "execution_count": 21
}
- ],
- "source": [
- "model_run.upsert_labels(label_ids)"
]
},
{
"cell_type": "markdown",
- "id": "mFlJY439GSHl",
- "metadata": {
- "id": "mFlJY439GSHl"
- },
"source": [
- "### Upload the predictions payload to the Model Run"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "id": "HFgB6qaSGSHm",
- "metadata": {
- "colab": {
- "base_uri": "https://localhost:8080/"
- },
- "id": "HFgB6qaSGSHm",
- "outputId": "90670294-c0df-4f3b-99c1-a435632ec62c"
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Errors: []\n"
- ]
- }
+ "## Optional deletions for cleanup \n"
],
- "source": [
- "## Create a label list \n",
- "label_list_prediction = [label_prediction]\n",
- "\n",
- "# Convert the prediction label from a Labelbox class object to the underlying NDJSON format required for upload - uploads can be directly built in this syntax as well\n",
- "ndjson_prediction = list(NDJsonConverter.serialize(label_list_prediction))\n",
- "\n",
- "# Upload the prediction label to the Model Run\n",
- "upload_job_prediction = model_run.add_predictions(\n",
- " name=\"prediction_upload_job\"+str(uuid.uuid4()),\n",
- " predictions=ndjson_prediction)\n",
- "\n",
- "# Errors will appear for annotation uploads that failed.\n",
- "print(\"Errors:\", upload_job_prediction.errors)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "OhgYk6byutP4",
"metadata": {
- "id": "OhgYk6byutP4"
- },
- "source": [
- "## Cleanup "
- ]
+ "id": "DMtOfWWDWFbJ"
+ }
},
{
"cell_type": "code",
- "execution_count": 22,
- "id": "_9FDSkrhur2q",
- "metadata": {
- "id": "_9FDSkrhur2q"
- },
- "outputs": [],
"source": [
- "# mal_project.delete()\n",
- "# li_project.delete()\n",
+ "#upload_job\n",
+ "# project.delete()\n",
"# dataset.delete()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "id": "3PQniVD44Qxa",
+ ],
"metadata": {
- "id": "3PQniVD44Qxa"
+ "id": "aAhkyvJlWK1p"
},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "colab": {
- "provenance": []
- },
- "kernelspec": {
- "display_name": ".venv",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.9 (main, Dec 15 2022, 10:44:50) [Clang 14.0.0 (clang-1400.0.29.202)]"
- },
- "vscode": {
- "interpreter": {
- "hash": "412aed463adf326d9764d14c60182bced0ba84bd239456f5212a9555c18f2937"
- }
+ "execution_count": 22,
+ "outputs": []
}
- },
- "nbformat": 4,
- "nbformat_minor": 5
+ ]
}
From 36f502d2368327c4afbd8d7550d4ed439c6cff6f Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Tue, 24 Jan 2023 10:15:49 -0500
Subject: [PATCH 2/6] Updated text for sub list step # 7
---
examples/prediction_upload/text_predictions.ipynb | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb
index 609561385..001d4b80d 100644
--- a/examples/prediction_upload/text_predictions.ipynb
+++ b/examples/prediction_upload/text_predictions.ipynb
@@ -5,8 +5,7 @@
"colab": {
"provenance": [],
"collapsed_sections": [
- "RgBYFUxa-VGT",
- "FTGAI730UlZ3"
+ "RgBYFUxa-VGT"
]
},
"kernelspec": {
@@ -635,7 +634,7 @@
{
"cell_type": "markdown",
"source": [
- "##### 5.1. Create a labelbox project"
+ "##### 7.1. Create a labelbox project"
],
"metadata": {
"id": "CYRiqHr2O_aL"
@@ -662,7 +661,7 @@
{
"cell_type": "markdown",
"source": [
- "##### 5.2. Create a batch to send to the project "
+ "##### 7.2. Create a batch to send to the project "
],
"metadata": {
"id": "7FEyC-nBPPuD"
@@ -701,7 +700,7 @@
{
"cell_type": "markdown",
"source": [
- "##### 5.3 Create the annotations payload"
+ "##### 7.3 Create the annotations payload"
],
"metadata": {
"id": "FTGAI730UlZ3"
@@ -770,7 +769,7 @@
{
"cell_type": "markdown",
"source": [
- "##### 5.3. Create the label object"
+ "##### 7.4. Create the label object"
],
"metadata": {
"id": "8QwmguFvPltl"
@@ -803,7 +802,7 @@
{
"cell_type": "markdown",
"source": [
- "##### 5.4. Upload annotations to the project using Label Import"
+ "##### 7.5. Upload annotations to the project using Label Import"
],
"metadata": {
"id": "nGVNQlvPQ-kF"
@@ -843,7 +842,7 @@
{
"cell_type": "markdown",
"source": [
- "##### 5.5 Send the annotations to the Model Run"
+ "##### 7.6 Send the annotations to the Model Run"
],
"metadata": {
"id": "Y3rgM-5cRrxM"
From c43bf024f50908f3727b7d451a884e26ec29a807 Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Tue, 24 Jan 2023 16:35:38 -0500
Subject: [PATCH 3/6] Added media type to ontology
---
examples/prediction_upload/text_predictions.ipynb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb
index 001d4b80d..723e4d8dd 100644
--- a/examples/prediction_upload/text_predictions.ipynb
+++ b/examples/prediction_upload/text_predictions.ipynb
@@ -438,7 +438,7 @@
" ]\n",
")\n",
"\n",
- "ontology = client.create_ontology(\"Ontology Text Predictions\", ontology_builder.asdict())\n"
+ "ontology = client.create_ontology(\"Ontology Text Predictions\", ontology_builder.asdict(), media_type=MediaType.Text)\n"
],
"metadata": {
"id": "Kt4XWWqgIiWk"
From dd01f49b3527426daee1ecc5c816ae659f1c5c61 Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Wed, 25 Jan 2023 13:16:49 -0500
Subject: [PATCH 4/6] Updated text
---
examples/prediction_upload/text_predictions.ipynb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb
index 723e4d8dd..974ad2f9c 100644
--- a/examples/prediction_upload/text_predictions.ipynb
+++ b/examples/prediction_upload/text_predictions.ipynb
@@ -510,7 +510,7 @@
"source": [
"## Step 5. Create the predictions payload\n",
"\n",
- "Create the annotations payload using the snippets of [code here](https://https://docs.labelbox.com/reference/import-image-annotations#supported-annotations).\n",
+ "Create the annotations payload using the snippets of code in the ***Supported Predictions*** section.\n",
"\n",
"Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n",
"\n",
From c9e4507278222530ba13cbdd51cb56a90467fc2b Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Thu, 26 Jan 2023 10:49:14 -0500
Subject: [PATCH 5/6] Removed unsupported python annotation
---
.../prediction_upload/text_predictions.ipynb | 104 +++++++++---------
1 file changed, 53 insertions(+), 51 deletions(-)
diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb
index 974ad2f9c..649c5d62a 100644
--- a/examples/prediction_upload/text_predictions.ipynb
+++ b/examples/prediction_upload/text_predictions.ipynb
@@ -88,10 +88,25 @@
"!pip install -q 'labelbox[data]'"
],
"metadata": {
- "id": "cm8xMaLbGb7v"
+ "id": "cm8xMaLbGb7v",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "a524ede8-eb09-4e90-eb8c-1de6ceeb7598"
},
- "execution_count": null,
- "outputs": []
+ "execution_count": 1,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m185.5/185.5 KB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m82.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n"
+ ]
+ }
+ ]
},
{
"cell_type": "code",
@@ -133,7 +148,7 @@
"metadata": {
"id": "z7ZLKLYLHP__"
},
- "execution_count": 3,
+ "execution_count": null,
"outputs": []
},
{
@@ -201,21 +216,8 @@
{
"cell_type": "code",
"source": [
- "#### Nested Classifications ######\n",
- "\n",
- "# Python annotation\n",
- "radio_prediction_nested = ClassificationAnnotation(\n",
- " name=\"radio_question_sub\", \n",
- " value=Radio(answer = ClassificationAnswer(name = \"first_radio_answer\", confidence=0.5)),\n",
- " classifications=[\n",
- " \tClassificationAnnotation(\n",
- " \tname=\"sub_radio_question\",\n",
- " \t\tvalue=Radio(answer=ClassificationAnswer(name=\"first_sub_radio_answer\", confidence=0.5))\n",
- " )\n",
- " ]\n",
- ")\n",
+ "#### Nested Classifications only supported with NDJSON tools ######\n",
"\n",
- "# NDJSON\n",
"nested_radio_prediction_ndjson = {\n",
" 'name': 'radio_question_sub',\n",
" 'answer': {\n",
@@ -228,16 +230,16 @@
" }\n",
"}\n",
"\n",
- "# Nested classification for checklits is only supported with NDJSON tools\n",
"nested_checklist_prediction_ndjson = {\n",
" \"name\": \"nested_checklist_question\",\n",
+ " \"confidence\": 0.01,\n",
" \"answer\": [{\n",
" \"name\": \"first_checklist_answer\", \n",
- " \"confidence\": 0.5,\n",
+ " \"confidence\": 0.01,\n",
" \"classifications\" : [\n",
" {\n",
" \"name\": \"sub_checklist_question\", \n",
- " \"answer\": {\"name\": \"first_sub_checklist_answer\", \"confidence\": 0.5 }\n",
+ " \"answer\": {\"name\": \"first_sub_checklist_answer\", \"confidence\": 0.01 }\n",
" } \n",
" ] \n",
" }]\n",
@@ -341,7 +343,7 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "c7ac76d1-3e11-48ed-f012-6bf8038856c3"
+ "outputId": "125b5130-5dd5-4e7f-8c53-f6da76359752"
},
"execution_count": 9,
"outputs": [
@@ -350,15 +352,15 @@
"name": "stdout",
"text": [
"\n"
]
}
@@ -438,7 +440,7 @@
" ]\n",
")\n",
"\n",
- "ontology = client.create_ontology(\"Ontology Text Predictions\", ontology_builder.asdict(), media_type=MediaType.Text)\n"
+ "ontology = client.create_ontology(\"Ontology Text Predictions\", ontology_builder.asdict() , media_type=MediaType.Text)\n"
],
"metadata": {
"id": "Kt4XWWqgIiWk"
@@ -489,9 +491,9 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "d38454d2-cad9-425d-9070-fc8ca4ea9de5"
+ "outputId": "d2a3efb9-f848-4041-e930-0c8ce8dcdc01"
},
- "execution_count": 12,
+ "execution_count": 20,
"outputs": [
{
"output_type": "execute_result",
@@ -501,7 +503,7 @@
]
},
"metadata": {},
- "execution_count": 12
+ "execution_count": 20
}
]
},
@@ -510,7 +512,7 @@
"source": [
"## Step 5. Create the predictions payload\n",
"\n",
- "Create the annotations payload using the snippets of code in the ***Supported Predictions*** section.\n",
+ "Create the prediction payload using the snippets of code in the **Supported Predcitions** section\n",
"\n",
"Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n",
"\n",
@@ -529,7 +531,6 @@
" annotations = [\n",
" entities_prediction, \n",
" radio_prediction, \n",
- " radio_prediction_nested,\n",
" checklist_prediction,\n",
" text_prediction\n",
" ]\n",
@@ -544,7 +545,7 @@
"metadata": {
"id": "zv2OLTXKSGWv"
},
- "execution_count": 13,
+ "execution_count": 21,
"outputs": []
},
{
@@ -578,7 +579,7 @@
"metadata": {
"id": "F-Y7sSyAV3tn"
},
- "execution_count": 14,
+ "execution_count": 22,
"outputs": []
},
{
@@ -596,7 +597,7 @@
"# Upload the prediction label to the Model Run\n",
"upload_job_prediction = model_run.add_predictions(\n",
" name=\"prediction_upload_job\"+str(uuid.uuid4()),\n",
- " predictions= ndjson_prediction_method2)\n",
+ " predictions=ndjson_prediction_method2)\n",
"\n",
"# Errors will appear for annotation uploads that failed.\n",
"print(\"Errors:\", upload_job_prediction.errors)\n",
@@ -607,9 +608,9 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "c6de12a6-1575-4c1e-d6a3-310d55f29996"
+ "outputId": "3829ae1e-3cd1-4619-9a25-73d7c2c1952c"
},
- "execution_count": 15,
+ "execution_count": 23,
"outputs": [
{
"output_type": "stream",
@@ -655,7 +656,7 @@
"metadata": {
"id": "jEtoDiDrPFvI"
},
- "execution_count": 16,
+ "execution_count": 14,
"outputs": []
},
{
@@ -681,19 +682,19 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "8cebf3ee-42ee-4389-e5a1-8c3495c198b5"
+ "outputId": "cf36bc23-3f88-4820-a682-8cd3d152d426"
},
- "execution_count": 17,
+ "execution_count": 15,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
- ""
+ ""
]
},
"metadata": {},
- "execution_count": 17
+ "execution_count": 15
}
]
},
@@ -763,7 +764,7 @@
"metadata": {
"id": "A8_HVvu9Uvfl"
},
- "execution_count": 18,
+ "execution_count": 16,
"outputs": []
},
{
@@ -778,6 +779,7 @@
{
"cell_type": "code",
"source": [
+ "# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations\n",
"ndjson_annotation = []\n",
"for annot in [\n",
" entities_ndjson, \n",
@@ -796,7 +798,7 @@
"metadata": {
"id": "9gD_alThQA3G"
},
- "execution_count": 19,
+ "execution_count": 17,
"outputs": []
},
{
@@ -826,9 +828,9 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "b4e547d6-f212-4451-dcdb-4fc6559e5e7a"
+ "outputId": "df994b08-d6c9-4699-bf3c-bd28ab6372e5"
},
- "execution_count": 20,
+ "execution_count": 18,
"outputs": [
{
"output_type": "stream",
@@ -860,9 +862,9 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "427f64fb-1fbb-4a2f-e822-2f7907f044aa"
+ "outputId": "3440e9fc-2c51-48d6-f17b-095406abe49f"
},
- "execution_count": 21,
+ "execution_count": 19,
"outputs": [
{
"output_type": "execute_result",
@@ -872,7 +874,7 @@
]
},
"metadata": {},
- "execution_count": 21
+ "execution_count": 19
}
]
},
@@ -888,14 +890,14 @@
{
"cell_type": "code",
"source": [
- "#upload_job\n",
+ "\n",
"# project.delete()\n",
"# dataset.delete()"
],
"metadata": {
"id": "aAhkyvJlWK1p"
},
- "execution_count": 22,
+ "execution_count": 53,
"outputs": []
}
]
From f99375edab2001a626862d3415e3471a37c7c06d Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Thu, 26 Jan 2023 10:52:15 -0500
Subject: [PATCH 6/6] Update data type in label prediction creation
---
examples/prediction_upload/text_predictions.ipynb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/prediction_upload/text_predictions.ipynb b/examples/prediction_upload/text_predictions.ipynb
index 649c5d62a..6db5b16d4 100644
--- a/examples/prediction_upload/text_predictions.ipynb
+++ b/examples/prediction_upload/text_predictions.ipynb
@@ -527,7 +527,7 @@
"source": [
"# Create a Label for predictions\n",
"label_prediction = Label(\n",
- " data=ImageData(uid=data_row.uid),\n",
+ " data=TextData(uid=data_row.uid),\n",
" annotations = [\n",
" entities_prediction, \n",
" radio_prediction, \n",