From 997a8268bb0a2b52282bf98cf3334bf158e3080e Mon Sep 17 00:00:00 2001
From: jtsodapop <67922677+jtsodapop@users.noreply.github.com>
Date: Wed, 12 Oct 2022 11:22:39 -0500
Subject: [PATCH] new conversational notebook
---
.../annotation_import/conversational.ipynb | 388 ++++++++++++++++++
.../{pdf_mal.ipynb => pdf.ipynb} | 2 +-
2 files changed, 389 insertions(+), 1 deletion(-)
create mode 100644 examples/annotation_import/conversational.ipynb
rename examples/annotation_import/{pdf_mal.ipynb => pdf.ipynb} (99%)
diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb
new file mode 100644
index 000000000..f715b4f0f
--- /dev/null
+++ b/examples/annotation_import/conversational.ipynb
@@ -0,0 +1,388 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "26b9c486",
+ "metadata": {},
+ "source": [
+ "
\n",
+ " \n",
+ " | "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "51eb4b54",
+ "metadata": {},
+ "source": [
+ "\n",
+ " \n",
+ " | \n",
+ "\n",
+ "\n",
+ " \n",
+ " | "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "27d147e7",
+ "metadata": {},
+ "source": [
+ "# Conversational Text Annotation Import\n",
+ "* This notebook will provide examples of each supported annotation type for conversational text assets. It will cover the following:\n",
+ " * Model-Assisted Labeling (MAL) - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "19b346e2",
+ "metadata": {},
+ "source": [
+ "* For information on what types of annotations are supported per data type, refer to this documentation:\n",
+ " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f4375aef",
+ "metadata": {},
+ "source": [
+ "* Notes:\n",
+ " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "00ad1e27",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -q 'labelbox[data]'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ccc4c3c3",
+ "metadata": {},
+ "source": [
+ "# Imports"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "f0de1cde",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n",
+ "from labelbox import Client, LabelingFrontend, MALPredictionImport\n",
+ "from labelbox.data.annotation_types import (\n",
+ " Label, ImageData, ObjectAnnotation, \n",
+ " TextEntity,\n",
+ " Radio, Checklist, Text,\n",
+ " ClassificationAnnotation, ClassificationAnswer\n",
+ ")\n",
+ "from labelbox.data.serialization import NDJsonConverter\n",
+ "from labelbox.schema.media_type import MediaType\n",
+ "import uuid\n",
+ "import json"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "54a028dd",
+ "metadata": {},
+ "source": [
+ "# API Key and Client\n",
+ "Provide a valid api key below in order to properly connect to the Labelbox Client."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "4aab38e2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add your api key\n",
+ "API_KEY = \"YOUR API KEY\"\n",
+ "API_KEY = \"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiYXBpS2V5SWQiOiJja2V2cDF2enAwdDg0MDc1N3I2ZWZldGgzIiwiaWF0IjoxNTk5Njc0NzY0LCJleHAiOjIyMzA4MjY3NjR9.iyqPpEWNpfcjcTid5WVkXLi51g22e_l3FrK-DlFJ2mM\"\n",
+ "client = Client(api_key=API_KEY)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c1763e44",
+ "metadata": {},
+ "source": [
+ "---- \n",
+ "### Steps\n",
+ "1. Make sure project is setup\n",
+ "2. Collect annotations\n",
+ "3. Upload"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d30024a7",
+ "metadata": {},
+ "source": [
+ "First, we create an ontology with all the possible tools and classifications supported for PDF. The official list of supported annotations to import can be found here:\n",
+ "- [Model-Assisted Labeling](https://docs.labelbox.com/docs/model-assisted-labeling) (annotations/labels are not submitted)\n",
+ "- [Conversational Text Annotations](https://docs.labelbox.com/docs/conversational-annotations)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "ae6f0919",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ontology_builder = OntologyBuilder(\n",
+ " tools=[ \n",
+ " Tool( # NER tool given the name \"ner\"\n",
+ " tool=Tool.Type.NER, \n",
+ " name=\"ner\")], \n",
+ " classifications=[ \n",
+ " Classification( # Text classification given the name \"text\"\n",
+ " class_type=Classification.Type.TEXT,\n",
+ " scope=Classification.Scope.INDEX, \n",
+ " instructions=\"text\"), \n",
+ " Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n",
+ " class_type=Classification.Type.CHECKLIST, \n",
+ " scope=Classification.Scope.INDEX, \n",
+ " instructions=\"checklist\", \n",
+ " options=[\n",
+ " Option(value=\"first_checklist_answer\"),\n",
+ " Option(value=\"second_checklist_answer\") \n",
+ " ]\n",
+ " ), \n",
+ " Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n",
+ " class_type=Classification.Type.RADIO, \n",
+ " instructions=\"radio\", \n",
+ " scope=Classification.Scope.INDEX, \n",
+ " options=[\n",
+ " Option(value=\"first_radio_answer\"),\n",
+ " Option(value=\"second_radio_answer\")\n",
+ " ]\n",
+ " )\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "b95935a7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "OntologyBuilder(tools=[Tool(tool=, name='ner', required=False, color=None, classifications=[], schema_id=None, feature_schema_id=None)], classifications=[Classification(class_type=, instructions='text', required=False, options=[], schema_id=None, feature_schema_id=None, scope=), Classification(class_type=, instructions='checklist', required=False, options=[Option(value='first_checklist_answer', label='first_checklist_answer', schema_id=None, feature_schema_id=None, options=[]), Option(value='second_checklist_answer', label='second_checklist_answer', schema_id=None, feature_schema_id=None, options=[])], schema_id=None, feature_schema_id=None, scope=), Classification(class_type=, instructions='radio', required=False, options=[Option(value='first_radio_answer', label='first_radio_answer', schema_id=None, feature_schema_id=None, options=[]), Option(value='second_radio_answer', label='second_radio_answer', schema_id=None, feature_schema_id=None, options=[])], schema_id=None, feature_schema_id=None, scope=)])"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ontology_builder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "6b6403a1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create Labelbox project\n",
+ "mal_project = client.create_project(name=\"conversational_mal_project\", media_type=MediaType.Document)\n",
+ "\n",
+ "# Create one Labelbox dataset\n",
+ "dataset = client.create_dataset(name=\"conversational_annotation_import_demo_dataset\")\n",
+ "\n",
+ "# Grab an example asset and create a Labelbox data row\n",
+ "data_row = dataset.create_data_row(\n",
+ " external_id = \"conversation-1\",\n",
+ " row_data = \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\"\n",
+ ")\n",
+ "\n",
+ "# Setup your ontology / labeling editor\n",
+ "editor = next(client.get_labeling_frontends(where=LabelingFrontend.name == \"Editor\")) # Unless using a custom editor, do not modify this\n",
+ "\n",
+ "mal_project.setup(editor, ontology_builder.asdict()) # Connect your ontology and editor to your MAL project\n",
+ "mal_project.datasets.connect(dataset) # Connect your dataset to your MAL project"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f4d3694e",
+ "metadata": {},
+ "source": [
+ "### Object Annotations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "551ca09a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# message based ner\n",
+ "ner_annotation = { \n",
+ " \"uuid\": str(uuid.uuid4()),\n",
+ " \"name\": \"ner\",\n",
+ " \"dataRow\": {\"id\": data_row.uid},\n",
+ " \"location\": { \n",
+ " \"start\": 0, \n",
+ " \"end\": 8 \n",
+ " },\n",
+ " \"messageId\": \"4\"\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1deaf1f1",
+ "metadata": {},
+ "source": [
+ "### Classification Annotations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "9c5d93de",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# message based classifications\n",
+ "text_annotation = {\n",
+ " 'name': 'text',\n",
+ " 'answer': 'the answer to the text questions right here',\n",
+ " 'uuid': str(uuid.uuid4()),\n",
+ " \"dataRow\": {\"id\": data_row.uid},\n",
+ " \"messageId\": \"0\",\n",
+ "}\n",
+ "checklist_annotation = {\n",
+ " 'name': 'checklist',\n",
+ " 'uuid': str(uuid.uuid4()),\n",
+ " 'answers': [\n",
+ " {'name': 'first_checklist_answer'},\n",
+ " {'name': 'second_checklist_answer'},\n",
+ " ],\n",
+ " \"dataRow\": {\"id\": data_row.uid},\n",
+ " \"messageId\": \"2\",\n",
+ "}\n",
+ "\n",
+ "radio_annotation = {\n",
+ " 'name': 'radio',\n",
+ " 'uuid': str(uuid.uuid4()), \n",
+ " \"dataRow\": {\"id\": data_row.uid},\n",
+ " 'answer': {\n",
+ " 'name': 'first_radio_answer'\n",
+ " },\n",
+ " \"messageId\": \"0\",\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "762db1d2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "annotations = [\n",
+ " ner_annotation,\n",
+ " text_annotation,\n",
+ " checklist_annotation,\n",
+ " radio_annotation\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "55be64cf",
+ "metadata": {},
+ "source": [
+ "### Model Assisted Labeling "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "10a1f924",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Upload our label using Model-Assisted Labeling\n",
+ "upload_job = MALPredictionImport.create_from_objects(\n",
+ " client = client, \n",
+ " project_id = mal_project.uid, \n",
+ " name=f\"mal_job-{str(uuid.uuid4())}\", \n",
+ " predictions=annotations)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "b17f6ba9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Errors: []\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Errors will appear for each annotation that failed.\n",
+ "# Empty list means that there were no errors\n",
+ "# This will provide information only after the upload_job is complete, so we do not need to worry about having to rerun\n",
+ "print(\"Errors:\", upload_job.errors)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7ee6bc98",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/annotation_import/pdf_mal.ipynb b/examples/annotation_import/pdf.ipynb
similarity index 99%
rename from examples/annotation_import/pdf_mal.ipynb
rename to examples/annotation_import/pdf.ipynb
index 1050f28eb..bb0423598 100644
--- a/examples/annotation_import/pdf_mal.ipynb
+++ b/examples/annotation_import/pdf.ipynb
@@ -74,7 +74,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 1,
"id": "e3522d4b",
"metadata": {},
"outputs": [],