From 4b39040d5da7b86be2ab995bc2fe2009f986c15d Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Tue, 28 Feb 2023 11:09:43 -0500
Subject: [PATCH 1/3] Updates - annotations and label payload generation
1. Removed signing function from label payload generation step
2. Added examples of nested classifications
3. Removed UUID
---
examples/annotation_import/text.ipynb | 383 +++++++++++++++++++-------
1 file changed, 280 insertions(+), 103 deletions(-)
diff --git a/examples/annotation_import/text.ipynb b/examples/annotation_import/text.ipynb
index c2a63b846..4fd82a3ec 100644
--- a/examples/annotation_import/text.ipynb
+++ b/examples/annotation_import/text.ipynb
@@ -1,19 +1,36 @@
{
"nbformat": 4,
"nbformat_minor": 5,
- "metadata": {},
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "gpuClass": "standard"
+ },
"cells": [
{
- "metadata": {},
+ "metadata": {
+ "id": "QzuLCy_RnFui"
+ },
"source": [
"
\n",
" \n",
" | "
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "QzuLCy_RnFui"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "UgWyn9wFnFuj"
+ },
"source": [
"\n",
" \n",
" | "
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "UgWyn9wFnFuj"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "gukkkk6qnFuj"
+ },
"source": [
"# Text Annotation Import\n",
"* This notebook will provide examples of each supported annotation type for text assets, and also cover MAL and Label Import methods.\n",
@@ -54,35 +74,47 @@
"\n",
"For information on what types of annotations are supported per data type, refer to the Import text annotations [documentation](https://docs.labelbox.com/reference/import-text-annotations)."
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "gukkkk6qnFuj"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "lQbH0pWgnFuj"
+ },
"source": [
"Notes:\n",
" * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly.\n",
" * You may need to refresh your browser in order to see the results of the import job."
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "lQbH0pWgnFuj"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "ldFsO8mdnFuk"
+ },
"source": [
"### Setup\n"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "ldFsO8mdnFuk"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "2t9sAheYnFuk"
+ },
"source": [
"!pip install -q 'labelbox[data]'"
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 264,
+ "id": "2t9sAheYnFuk"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "RqjPuljsnFuk"
+ },
"source": [
"import labelbox as lb\n",
"import labelbox.types as lb_types\n",
@@ -91,43 +123,58 @@
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 265,
+ "id": "RqjPuljsnFuk"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "4VTWuozgnFuk"
+ },
"source": [
"### Replace with your API key\n",
"Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "4VTWuozgnFuk"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "qPdscvwDnFuk"
+ },
"source": [
"# Add your api key\n",
- "API_KEY = None\n",
+ "API_KEY=None\n",
"client = lb.Client(API_KEY)"
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 266,
+ "id": "qPdscvwDnFuk"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "p5qLIkoDnFuk"
+ },
"source": [
"## Supported annotations for text"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "p5qLIkoDnFuk"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "CDcx5qR4nFuk"
+ },
"source": [
"### Supported Python annotation types and NDJSON"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "CDcx5qR4nFuk"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "zKamWpc1nFuk"
+ },
"source": [
"########## Entities ##########\n",
"\n",
@@ -147,10 +194,13 @@
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 267,
+ "id": "zKamWpc1nFuk"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "97R_u85znFul"
+ },
"source": [
"########## Classification - Radio (single choice ) ##########\n",
"\n",
@@ -171,16 +221,19 @@
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 268,
+ "id": "97R_u85znFul"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "CmakTYIMnFul"
+ },
"source": [
"########## Classification - Radio (with subclassifcations) is only suppported with NDJSON tools ##########\n",
"\n",
"# NDJSON\n",
- "radio_annotation_ndjson_with_subclass = {\n",
- " 'name': 'radio_question_sub',\n",
+ "nested_radio_annotation_ndjson= {\n",
+ " 'name': 'nested_radio_question',\n",
" 'answer': {\n",
" 'name': 'first_radio_answer',\n",
" 'classifications': [{\n",
@@ -188,14 +241,30 @@
" 'answer': { 'name' : 'first_sub_radio_answer'}\n",
" }]\n",
" }\n",
+ "}\n",
+ "\n",
+ "nested_checklist_annotation_ndjson = {\n",
+ " \"name\": \"nested_checklist_question\",\n",
+ " \"answer\": [{\n",
+ " \"name\": \"first_checklist_answer\", \n",
+ " \"classifications\" : [\n",
+ " {\n",
+ " \"name\": \"sub_checklist_question\", \n",
+ " \"answer\": {\"name\": \"first_sub_checklist_answer\"}\n",
+ " } \n",
+ " ] \n",
+ " }]\n",
"}"
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 269,
+ "id": "CmakTYIMnFul"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "4eWMZey8nFul"
+ },
"source": [
"########## Classification - Checklist (Multi-choice) ##########\n",
"\n",
@@ -222,10 +291,13 @@
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 270,
+ "id": "4eWMZey8nFul"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "r2zGMd6XnFul"
+ },
"source": [
"########## Classification Free-Form text ##########\n",
"\n",
@@ -243,24 +315,37 @@
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 271,
+ "id": "r2zGMd6XnFul"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "ggNCgkFtnFul"
+ },
"source": [
"## Upload Annoations - putting it all together "
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "ggNCgkFtnFul"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "ENc3zDwznFul"
+ },
"source": [
"### Step 1: Import data rows into Catalog"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "ENc3zDwznFul"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "5GOSI_yunFul",
+ "outputId": "0082435a-0533-469e-b790-8cc93d96f084",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
"source": [
"# You can now include ohter fields like attachments, media type and metadata in the data row creation step: https://docs.labelbox.com/reference/text-file \n",
"text_asset = {\n",
@@ -278,28 +363,31 @@
"cell_type": "code",
"outputs": [
{
- "name": "stdout",
"output_type": "stream",
+ "name": "stdout",
"text": [
"\n",
- "cldj4gyf60fy207xh3z1y2g1h\n"
+ "cleofyn3r0phe07x8aj6ibxnr\n"
]
}
],
- "execution_count": null
+ "execution_count": 272,
+ "id": "5GOSI_yunFul"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "GXm6hivmnFum"
+ },
"source": [
"### Step 2: Create/select an ontology\n",
"Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the `name`/`instructions` fields in your annotations to ensure the correct feature schemas are matched.\n",
@@ -308,10 +396,17 @@
"\n",
"[Documentation for reference ](https://docs.labelbox.com/reference/import-text-annotations)"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "GXm6hivmnFum"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "p8dR-eegnFum",
+ "outputId": "f8933a3d-d1c6-4b61-a73d-760c44bc1d6b",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
"source": [
"## Setup the ontology and link the tools created above.\n",
"\n",
@@ -324,7 +419,7 @@
" ),\n",
" lb.Classification( \n",
" class_type=lb.Classification.Type.RADIO, \n",
- " instructions=\"radio_question_sub\", \n",
+ " instructions=\"nested_radio_question\", \n",
" options=[\n",
" lb.Option(value=\"first_radio_answer\",\n",
" options=[\n",
@@ -336,8 +431,23 @@
" ]\n",
" ),\n",
" ]\n",
+ " ),\n",
+ " ], \n",
+ " ),\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " name=\"nested_checklist_question\",\n",
+ " options=[\n",
+ " lb.Option(\"first_checklist_answer\",\n",
+ " options=[\n",
+ " lb.Classification(\n",
+ " class_type=lb.Classification.Type.CHECKLIST,\n",
+ " name=\"sub_checklist_question\", \n",
+ " options=[lb.Option(\"first_sub_checklist_answer\")]\n",
+ " )\n",
+ " ]\n",
" )\n",
- " ],\n",
+ " ]\n",
" ),\n",
" lb.Classification( \n",
" class_type=lb.Classification.Type.CHECKLIST, \n",
@@ -362,19 +472,34 @@
"ontology = client.create_ontology(\"Ontology Text Annotations\", ontology_builder.asdict())\n"
],
"cell_type": "code",
- "outputs": [],
- "execution_count": null
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.8/dist-packages/labelbox/schema/ontology.py:159: UserWarning: When creating the Classification feature, please use “name” for the classification schema name, which will be used when creating annotation payload for Model-Assisted Labeling Import and Label Import. “instructions” is no longer supported to specify classification schema name.\n",
+ " warnings.warn(msg)\n"
+ ]
+ }
+ ],
+ "execution_count": 273,
+ "id": "p8dR-eegnFum"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "6RI4kJFwnFum"
+ },
"source": [
"### Step 3: Create a labeling project \n",
"Connect the ontology to the labeling project "
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "6RI4kJFwnFum"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "F75j3jvVnFum"
+ },
"source": [
"# Project defaults to batch mode with benchmark quality settings if this argument is not provided\n",
"# Queue mode will be deprecated once dataset mode is deprecated\n",
@@ -403,17 +528,27 @@
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 274,
+ "id": "F75j3jvVnFum"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "xea2VK0_nFum"
+ },
"source": [
"### Step 4: Send a batch of data rows to the project "
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "xea2VK0_nFum"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "Pxpuz4CPnFum",
+ "outputId": "1ef538fb-f99a-4a9a-9e8e-eff9857d219b",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
"source": [
"# Setup Batches and Ontology\n",
"\n",
@@ -429,24 +564,27 @@
"cell_type": "code",
"outputs": [
{
- "name": "stdout",
"output_type": "stream",
+ "name": "stdout",
"text": [
"Batch: \n"
]
}
],
- "execution_count": null
+ "execution_count": 275,
+ "id": "Pxpuz4CPnFum"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "F3PJkrCznFum"
+ },
"source": [
"### Step 5: Create the annotations payload\n",
"\n",
@@ -454,21 +592,27 @@
"\n",
"Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below. If you are using Python Annotation types, compose your annotations into Labels attached to the data rows."
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "F3PJkrCznFum"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "l_YL-KztnFum"
+ },
"source": [
"#### Python annotations"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "l_YL-KztnFum"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "hepY4bjbnFun"
+ },
"source": [
"# Create a Label\n",
"label = lb_types.Label(\n",
- " data=lb_types.ImageData(\n",
+ " data=lb_types.TextData(\n",
" uid=data_row.uid),\n",
" annotations = [\n",
" named_entitity_annotation, \n",
@@ -479,33 +623,36 @@
")\n",
"\n",
"\n",
- "# Create urls to mask data for upload\n",
- "def signing_function(obj_bytes: bytes) -> str:\n",
- " url = client.upload_data(content=obj_bytes, sign=True)\n",
- " return url \n",
- "\n",
- "label.add_url_to_masks(signing_function)"
+ "label_list = [label]"
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 276,
+ "id": "hepY4bjbnFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "Vl89akLnnFun"
+ },
"source": [
"#### NDJSON annotations"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "Vl89akLnnFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "n82GAx3UnFun"
+ },
"source": [
"label_ndjson_method2 = []\n",
"for annotations in [entities_ndjson, \n",
" radio_annotation_ndjson, \n",
- " radio_annotation_ndjson_with_subclass,\n",
" checklist_annotation_ndjson,\n",
- " text_annotation_ndjson] :\n",
+ " text_annotation_ndjson,\n",
+ " nested_radio_annotation_ndjson,\n",
+ " nested_checklist_annotation_ndjson\n",
+ " ] :\n",
" annotations.update({\n",
" 'dataRow': {\n",
" 'id': data_row.uid\n",
@@ -515,27 +662,40 @@
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 277,
+ "id": "n82GAx3UnFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "kbZumShZnFun"
+ },
"source": [
"### Step 6: Upload annotations to a project as pre-labels or completed labels\n",
"For the purpose of this tutorial only run one of the label_ndjosn annotation type tools at the time (NDJSON or Python Annotation types). \n",
"\n",
"\n"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "kbZumShZnFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "QKGLc0IwnFun"
+ },
"source": [
"#### Model-Assisted Labeling (MAL)"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "QKGLc0IwnFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "zKdWZm24nFun",
+ "outputId": "7f4fa248-a5d6-471b-c9c3-bb95f1c14a68",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
"source": [
"# Upload MAL label for this data row in project\n",
"upload_job_mal = lb.MALPredictionImport.create_from_objects(\n",
@@ -552,25 +712,35 @@
"cell_type": "code",
"outputs": [
{
- "name": "stdout",
"output_type": "stream",
+ "name": "stdout",
"text": [
"Errors: []\n",
" \n"
]
}
],
- "execution_count": null
+ "execution_count": 278,
+ "id": "zKdWZm24nFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "Iyi_F5NpnFun"
+ },
"source": [
"#### Label Import "
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "Iyi_F5NpnFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "-qZOAF2BnFun",
+ "outputId": "1ff18daf-cb6e-4aa9-e880-fae13e45d84e",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
"source": [
"# Upload label for this data row in project\n",
"upload_job_label_import = lb.LabelImport.create_from_objects(\n",
@@ -585,31 +755,38 @@
"cell_type": "code",
"outputs": [
{
- "name": "stdout",
"output_type": "stream",
+ "name": "stdout",
"text": [
"Errors: []\n"
]
}
],
- "execution_count": null
+ "execution_count": 279,
+ "id": "-qZOAF2BnFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "4JBjAocinFun"
+ },
"source": [
"### Optional deletions for cleanup"
],
- "cell_type": "markdown"
+ "cell_type": "markdown",
+ "id": "4JBjAocinFun"
},
{
- "metadata": {},
+ "metadata": {
+ "id": "i7L3u114nFun"
+ },
"source": [
"# project.delete()\n",
"# dataset.delete()"
],
"cell_type": "code",
"outputs": [],
- "execution_count": null
+ "execution_count": 280,
+ "id": "i7L3u114nFun"
}
]
-}
\ No newline at end of file
+}
From 458664ace8eb13988725a536523e4460f8f8c41d Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Tue, 28 Feb 2023 11:13:12 -0500
Subject: [PATCH 2/3] Added media type to ontology
---
examples/annotation_import/text.ipynb | 36 +++++++++++++--------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/examples/annotation_import/text.ipynb b/examples/annotation_import/text.ipynb
index 4fd82a3ec..82c507e05 100644
--- a/examples/annotation_import/text.ipynb
+++ b/examples/annotation_import/text.ipynb
@@ -143,7 +143,7 @@
},
"source": [
"# Add your api key\n",
- "API_KEY=None\n",
+ "API_KEY=API_KEY=None\n",
"client = lb.Client(API_KEY)"
],
"cell_type": "code",
@@ -229,7 +229,7 @@
"id": "CmakTYIMnFul"
},
"source": [
- "########## Classification - Radio (with subclassifcations) is only suppported with NDJSON tools ##########\n",
+ "########## Classification - Radio and Checklist (with subclassifcations) is only suppported with NDJSON tools ##########\n",
"\n",
"# NDJSON\n",
"nested_radio_annotation_ndjson= {\n",
@@ -340,11 +340,11 @@
},
{
"metadata": {
- "id": "5GOSI_yunFul",
- "outputId": "0082435a-0533-469e-b790-8cc93d96f084",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "5GOSI_yunFul",
+ "outputId": "0082435a-0533-469e-b790-8cc93d96f084"
},
"source": [
"# You can now include ohter fields like attachments, media type and metadata in the data row creation step: https://docs.labelbox.com/reference/text-file \n",
@@ -401,11 +401,11 @@
},
{
"metadata": {
- "id": "p8dR-eegnFum",
- "outputId": "f8933a3d-d1c6-4b61-a73d-760c44bc1d6b",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "p8dR-eegnFum",
+ "outputId": "f8933a3d-d1c6-4b61-a73d-760c44bc1d6b"
},
"source": [
"## Setup the ontology and link the tools created above.\n",
@@ -469,7 +469,7 @@
" ]\n",
")\n",
"\n",
- "ontology = client.create_ontology(\"Ontology Text Annotations\", ontology_builder.asdict())\n"
+ "ontology = client.create_ontology(\"Ontology Text Annotations\", ontology_builder.asdict(), media_type=lb.MediaType.Text)\n"
],
"cell_type": "code",
"outputs": [
@@ -543,11 +543,11 @@
},
{
"metadata": {
- "id": "Pxpuz4CPnFum",
- "outputId": "1ef538fb-f99a-4a9a-9e8e-eff9857d219b",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "Pxpuz4CPnFum",
+ "outputId": "1ef538fb-f99a-4a9a-9e8e-eff9857d219b"
},
"source": [
"# Setup Batches and Ontology\n",
@@ -690,11 +690,11 @@
},
{
"metadata": {
- "id": "zKdWZm24nFun",
- "outputId": "7f4fa248-a5d6-471b-c9c3-bb95f1c14a68",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "zKdWZm24nFun",
+ "outputId": "7f4fa248-a5d6-471b-c9c3-bb95f1c14a68"
},
"source": [
"# Upload MAL label for this data row in project\n",
@@ -735,11 +735,11 @@
},
{
"metadata": {
- "id": "-qZOAF2BnFun",
- "outputId": "1ff18daf-cb6e-4aa9-e880-fae13e45d84e",
"colab": {
"base_uri": "https://localhost:8080/"
- }
+ },
+ "id": "-qZOAF2BnFun",
+ "outputId": "1ff18daf-cb6e-4aa9-e880-fae13e45d84e"
},
"source": [
"# Upload label for this data row in project\n",
From 67d6ae358ceda68773058ece0ae15f7af551dce5 Mon Sep 17 00:00:00 2001
From: Andrea Ovalle <74880762+ovalle15@users.noreply.github.com>
Date: Tue, 28 Feb 2023 11:18:42 -0500
Subject: [PATCH 3/3] Removed typo
---
examples/annotation_import/text.ipynb | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/examples/annotation_import/text.ipynb b/examples/annotation_import/text.ipynb
index 82c507e05..144c585df 100644
--- a/examples/annotation_import/text.ipynb
+++ b/examples/annotation_import/text.ipynb
@@ -143,7 +143,7 @@
},
"source": [
"# Add your api key\n",
- "API_KEY=API_KEY=None\n",
+ "API_KEY=None\n",
"client = lb.Client(API_KEY)"
],
"cell_type": "code",