From 997a8268bb0a2b52282bf98cf3334bf158e3080e Mon Sep 17 00:00:00 2001 From: jtsodapop <67922677+jtsodapop@users.noreply.github.com> Date: Wed, 12 Oct 2022 11:22:39 -0500 Subject: [PATCH] new conversational notebook --- .../annotation_import/conversational.ipynb | 388 ++++++++++++++++++ .../{pdf_mal.ipynb => pdf.ipynb} | 2 +- 2 files changed, 389 insertions(+), 1 deletion(-) create mode 100644 examples/annotation_import/conversational.ipynb rename examples/annotation_import/{pdf_mal.ipynb => pdf.ipynb} (99%) diff --git a/examples/annotation_import/conversational.ipynb b/examples/annotation_import/conversational.ipynb new file mode 100644 index 000000000..f715b4f0f --- /dev/null +++ b/examples/annotation_import/conversational.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "26b9c486", + "metadata": {}, + "source": [ + "\n", + " \n", + "" + ] + }, + { + "cell_type": "markdown", + "id": "51eb4b54", + "metadata": {}, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "id": "27d147e7", + "metadata": {}, + "source": [ + "# Conversational Text Annotation Import\n", + "* This notebook will provide examples of each supported annotation type for conversational text assets. It will cover the following:\n", + " * Model-Assisted Labeling (MAL) - used to provide pre-annotated data for your labelers. This will enable a reduction in the total amount of time to properly label your assets. Model-assisted labeling does not submit the labels automatically, and will need to be reviewed by a labeler for submission." + ] + }, + { + "cell_type": "markdown", + "id": "19b346e2", + "metadata": {}, + "source": [ + "* For information on what types of annotations are supported per data type, refer to this documentation:\n", + " * https://docs.labelbox.com/docs/model-assisted-labeling#option-1-import-via-python-annotation-types-recommended" + ] + }, + { + "cell_type": "markdown", + "id": "f4375aef", + "metadata": {}, + "source": [ + "* Notes:\n", + " * Wait until the import job is complete before opening the Editor to make sure all annotations are imported properly." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "00ad1e27", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q 'labelbox[data]'" + ] + }, + { + "cell_type": "markdown", + "id": "ccc4c3c3", + "metadata": {}, + "source": [ + "# Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "f0de1cde", + "metadata": {}, + "outputs": [], + "source": [ + "from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n", + "from labelbox import Client, LabelingFrontend, MALPredictionImport\n", + "from labelbox.data.annotation_types import (\n", + " Label, ImageData, ObjectAnnotation, \n", + " TextEntity,\n", + " Radio, Checklist, Text,\n", + " ClassificationAnnotation, ClassificationAnswer\n", + ")\n", + "from labelbox.data.serialization import NDJsonConverter\n", + "from labelbox.schema.media_type import MediaType\n", + "import uuid\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "id": "54a028dd", + "metadata": {}, + "source": [ + "# API Key and Client\n", + "Provide a valid api key below in order to properly connect to the Labelbox Client." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4aab38e2", + "metadata": {}, + "outputs": [], + "source": [ + "# Add your api key\n", + "API_KEY = \"YOUR API KEY\"\n", + "API_KEY = \"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VySWQiOiJja2NjOWZtbXc0aGNkMDczOHFpeWM2YW54Iiwib3JnYW5pemF0aW9uSWQiOiJja2N6NmJ1YnVkeWZpMDg1NW8xZHQxZzlzIiwiYXBpS2V5SWQiOiJja2V2cDF2enAwdDg0MDc1N3I2ZWZldGgzIiwiaWF0IjoxNTk5Njc0NzY0LCJleHAiOjIyMzA4MjY3NjR9.iyqPpEWNpfcjcTid5WVkXLi51g22e_l3FrK-DlFJ2mM\"\n", + "client = Client(api_key=API_KEY)" + ] + }, + { + "cell_type": "markdown", + "id": "c1763e44", + "metadata": {}, + "source": [ + "---- \n", + "### Steps\n", + "1. Make sure project is setup\n", + "2. Collect annotations\n", + "3. Upload" + ] + }, + { + "cell_type": "markdown", + "id": "d30024a7", + "metadata": {}, + "source": [ + "First, we create an ontology with all the possible tools and classifications supported for PDF. The official list of supported annotations to import can be found here:\n", + "- [Model-Assisted Labeling](https://docs.labelbox.com/docs/model-assisted-labeling) (annotations/labels are not submitted)\n", + "- [Conversational Text Annotations](https://docs.labelbox.com/docs/conversational-annotations)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ae6f0919", + "metadata": {}, + "outputs": [], + "source": [ + "ontology_builder = OntologyBuilder(\n", + " tools=[ \n", + " Tool( # NER tool given the name \"ner\"\n", + " tool=Tool.Type.NER, \n", + " name=\"ner\")], \n", + " classifications=[ \n", + " Classification( # Text classification given the name \"text\"\n", + " class_type=Classification.Type.TEXT,\n", + " scope=Classification.Scope.INDEX, \n", + " instructions=\"text\"), \n", + " Classification( # Checklist classification given the name \"text\" with two options: \"first_checklist_answer\" and \"second_checklist_answer\"\n", + " class_type=Classification.Type.CHECKLIST, \n", + " scope=Classification.Scope.INDEX, \n", + " instructions=\"checklist\", \n", + " options=[\n", + " Option(value=\"first_checklist_answer\"),\n", + " Option(value=\"second_checklist_answer\") \n", + " ]\n", + " ), \n", + " Classification( # Radio classification given the name \"text\" with two options: \"first_radio_answer\" and \"second_radio_answer\"\n", + " class_type=Classification.Type.RADIO, \n", + " instructions=\"radio\", \n", + " scope=Classification.Scope.INDEX, \n", + " options=[\n", + " Option(value=\"first_radio_answer\"),\n", + " Option(value=\"second_radio_answer\")\n", + " ]\n", + " )\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b95935a7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "OntologyBuilder(tools=[Tool(tool=, name='ner', required=False, color=None, classifications=[], schema_id=None, feature_schema_id=None)], classifications=[Classification(class_type=, instructions='text', required=False, options=[], schema_id=None, feature_schema_id=None, scope=), Classification(class_type=, instructions='checklist', required=False, options=[Option(value='first_checklist_answer', label='first_checklist_answer', schema_id=None, feature_schema_id=None, options=[]), Option(value='second_checklist_answer', label='second_checklist_answer', schema_id=None, feature_schema_id=None, options=[])], schema_id=None, feature_schema_id=None, scope=), Classification(class_type=, instructions='radio', required=False, options=[Option(value='first_radio_answer', label='first_radio_answer', schema_id=None, feature_schema_id=None, options=[]), Option(value='second_radio_answer', label='second_radio_answer', schema_id=None, feature_schema_id=None, options=[])], schema_id=None, feature_schema_id=None, scope=)])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ontology_builder" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6b6403a1", + "metadata": {}, + "outputs": [], + "source": [ + "# Create Labelbox project\n", + "mal_project = client.create_project(name=\"conversational_mal_project\", media_type=MediaType.Document)\n", + "\n", + "# Create one Labelbox dataset\n", + "dataset = client.create_dataset(name=\"conversational_annotation_import_demo_dataset\")\n", + "\n", + "# Grab an example asset and create a Labelbox data row\n", + "data_row = dataset.create_data_row(\n", + " external_id = \"conversation-1\",\n", + " row_data = \"https://storage.googleapis.com/labelbox-developer-testing-assets/conversational_text/1000-conversations/conversation-1.json\"\n", + ")\n", + "\n", + "# Setup your ontology / labeling editor\n", + "editor = next(client.get_labeling_frontends(where=LabelingFrontend.name == \"Editor\")) # Unless using a custom editor, do not modify this\n", + "\n", + "mal_project.setup(editor, ontology_builder.asdict()) # Connect your ontology and editor to your MAL project\n", + "mal_project.datasets.connect(dataset) # Connect your dataset to your MAL project" + ] + }, + { + "cell_type": "markdown", + "id": "f4d3694e", + "metadata": {}, + "source": [ + "### Object Annotations" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "551ca09a", + "metadata": {}, + "outputs": [], + "source": [ + "# message based ner\n", + "ner_annotation = { \n", + " \"uuid\": str(uuid.uuid4()),\n", + " \"name\": \"ner\",\n", + " \"dataRow\": {\"id\": data_row.uid},\n", + " \"location\": { \n", + " \"start\": 0, \n", + " \"end\": 8 \n", + " },\n", + " \"messageId\": \"4\"\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "1deaf1f1", + "metadata": {}, + "source": [ + "### Classification Annotations" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "9c5d93de", + "metadata": {}, + "outputs": [], + "source": [ + "# message based classifications\n", + "text_annotation = {\n", + " 'name': 'text',\n", + " 'answer': 'the answer to the text questions right here',\n", + " 'uuid': str(uuid.uuid4()),\n", + " \"dataRow\": {\"id\": data_row.uid},\n", + " \"messageId\": \"0\",\n", + "}\n", + "checklist_annotation = {\n", + " 'name': 'checklist',\n", + " 'uuid': str(uuid.uuid4()),\n", + " 'answers': [\n", + " {'name': 'first_checklist_answer'},\n", + " {'name': 'second_checklist_answer'},\n", + " ],\n", + " \"dataRow\": {\"id\": data_row.uid},\n", + " \"messageId\": \"2\",\n", + "}\n", + "\n", + "radio_annotation = {\n", + " 'name': 'radio',\n", + " 'uuid': str(uuid.uuid4()), \n", + " \"dataRow\": {\"id\": data_row.uid},\n", + " 'answer': {\n", + " 'name': 'first_radio_answer'\n", + " },\n", + " \"messageId\": \"0\",\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "762db1d2", + "metadata": {}, + "outputs": [], + "source": [ + "annotations = [\n", + " ner_annotation,\n", + " text_annotation,\n", + " checklist_annotation,\n", + " radio_annotation\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "55be64cf", + "metadata": {}, + "source": [ + "### Model Assisted Labeling " + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "10a1f924", + "metadata": {}, + "outputs": [], + "source": [ + "# Upload our label using Model-Assisted Labeling\n", + "upload_job = MALPredictionImport.create_from_objects(\n", + " client = client, \n", + " project_id = mal_project.uid, \n", + " name=f\"mal_job-{str(uuid.uuid4())}\", \n", + " predictions=annotations)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "b17f6ba9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Errors: []\n" + ] + } + ], + "source": [ + "# Errors will appear for each annotation that failed.\n", + "# Empty list means that there were no errors\n", + "# This will provide information only after the upload_job is complete, so we do not need to worry about having to rerun\n", + "print(\"Errors:\", upload_job.errors)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ee6bc98", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/annotation_import/pdf_mal.ipynb b/examples/annotation_import/pdf.ipynb similarity index 99% rename from examples/annotation_import/pdf_mal.ipynb rename to examples/annotation_import/pdf.ipynb index 1050f28eb..bb0423598 100644 --- a/examples/annotation_import/pdf_mal.ipynb +++ b/examples/annotation_import/pdf.ipynb @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "e3522d4b", "metadata": {}, "outputs": [],