diff --git a/examples/prediction_upload/html_predictions.ipynb b/examples/prediction_upload/html_predictions.ipynb new file mode 100644 index 000000000..9fa0478ba --- /dev/null +++ b/examples/prediction_upload/html_predictions.ipynb @@ -0,0 +1,871 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "collapsed_sections": [ + "6FZyvnrqSGuc", + "viFHCnBeTD1Y" + ] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "a6a048e8-b5fe-418b-aec4-829b5b6802e5" + }, + "source": [ + "\n", + " \n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "51cf1362-1cde-4749-aac7-5fb94473baa7" + }, + "source": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# HTML Prediction Import\n", + "\n", + "This notebook walks you through the process of uploading model predictions to a Model Run. This notebook provides an example for each supported prediction type for HTML assets.\n", + "\n", + "**Supported predictions**\n", + "- Radio Classification \n", + "- Checklist Classification\n", + "- free-text Classification\n", + "\n", + "**Not supported:**\n", + "- Bounding Box\n", + "- Polygon\n", + "- Point\n", + "- Polyline\n", + "- Masks\n", + "- NER\n", + "\n", + "A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle." + ], + "metadata": { + "id": "9znxMjDYGi0Y" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Setup" + ], + "metadata": { + "id": "UtJHIuE8HDRI" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install -q 'labelbox[data]'" + ], + "metadata": { + "id": "cm8xMaLbGb7v", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "65ba50e8-4a5d-45c5-90b7-7f8c2c168c62" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m185.5/185.5 KB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Building wheel for pygeotile (setup.py) ... \u001b[?25l\u001b[?25hdone\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option\n", + "from labelbox import Client, MALPredictionImport, LabelImport\n", + "from labelbox.data.serialization import NDJsonConverter\n", + "from labelbox.schema.media_type import MediaType\n", + "from labelbox.data.annotation_types import (\n", + " Label, ImageData, ObjectAnnotation, MaskData,\n", + " Rectangle, Point, Line, Mask, Polygon,\n", + " Radio, Checklist, Text,\n", + " ClassificationAnnotation, ClassificationAnswer\n", + ")\n", + "import uuid\n", + "import numpy as np\n", + "from labelbox.schema.queue_mode import QueueMode" + ], + "metadata": { + "id": "NIq-6M9kHKSs" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Replace with your API Key \n", + "Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)" + ], + "metadata": { + "id": "pZ2rBqY8HQoe" + } + }, + { + "cell_type": "code", + "source": [ + "API_KEY = None\n", + "client = Client(API_KEY)" + ], + "metadata": { + "id": "z7ZLKLYLHP__" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Supported Predictions" + ], + "metadata": { + "id": "RgBYFUxa-VGT" + } + }, + { + "cell_type": "code", + "source": [ + "########### Radio Classification ###########\n", + "radio_prediction = ClassificationAnnotation(\n", + " name=\"radio_question\", \n", + " value=Radio(answer = ClassificationAnswer(name = \"second_radio_answer\", confidence=0.5))\n", + ")\n", + "\n", + "\n", + "radio_prediction_ndjson = {\n", + " 'name': 'radio_question',\n", + " 'answer': {'name': 'first_radio_answer'}\n", + "}\n", + "\n" + ], + "metadata": { + "id": "FJhCAqeR-cUJ" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#### Nested Classifications ######\n", + "\n", + "# Python annotation\n", + "radio_prediction_nested = ClassificationAnnotation(\n", + " name=\"radio_question_sub\", \n", + " value=Radio(answer = ClassificationAnswer(name = \"first_radio_answer\", confidence=0.5)),\n", + " classifications=[\n", + " \tClassificationAnnotation(\n", + " \tname=\"sub_radio_question\",\n", + " \t\tvalue=Radio(answer=ClassificationAnswer(name=\"first_sub_radio_answer\", confidence=0.5))\n", + " )\n", + " ]\n", + ")\n", + "\n", + "# NDJSON\n", + "nested_radio_prediction_ndjson = {\n", + " 'name': 'radio_question_sub',\n", + " 'answer': {\n", + " 'name': 'first_radio_answer',\n", + " \"confidence\": 0.5,\n", + " 'classifications': [{\n", + " 'name':'sub_radio_question',\n", + " 'answer': { 'name' : 'first_sub_radio_answer', 'confidence': 0.5 }\n", + " }]\n", + " }\n", + "}\n", + "\n", + "# Nested classification for checklits is only supported with NDJSON tools\n", + "nested_checklist_prediction_ndjson = {\n", + " \"name\": \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\", \n", + " \"confidence\": 0.5,\n", + " \"classifications\" : [\n", + " {\n", + " \"name\": \"sub_checklist_question\", \n", + " \"answer\": {\"name\": \"first_sub_checklist_answer\", \"confidence\": 0.5 }\n", + " } \n", + " ] \n", + " }]\n", + "}" + ], + "metadata": { + "id": "D_luKHzuY-b3" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "########## Checklist ##########\n", + "\n", + "# Python annotation\n", + "checklist_prediction = ClassificationAnnotation(\n", + " name=\"checklist_question\",\n", + " value=Checklist(\n", + " answer = [\n", + " ClassificationAnswer(\n", + " name = \"first_checklist_answer\",\n", + " confidence=0.5\n", + " ),\n", + " ClassificationAnswer(\n", + " name = \"second_checklist_answer\", \n", + " confidence=0.5\n", + " ),\n", + " ClassificationAnswer(\n", + " name = \"third_checklist_answer\", \n", + " confidence=0.5\n", + " )\n", + " ])\n", + " )\n", + "\n", + "\n", + "# NDJSON\n", + "checklist_prediction_ndjson = {\n", + " 'name': 'checklist_question',\n", + " 'confidence': 0.5,\n", + " 'answer': [\n", + " {'name': 'first_checklist_answer', 'confidence': 0.5}\n", + " ]\n", + "}\n" + ], + "metadata": { + "id": "rjHUJUbGZXgY" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "########## Classification Free-Form text ##########\n", + "## Text classifications do not support confidence values\n", + "# Python annotation\n", + "text_prediction = ClassificationAnnotation(\n", + " name = \"free_text\", \n", + " value = Text(answer=\"sample text\")\n", + ")\n", + "\n", + "# NDJSON\n", + "text_prediction_ndjson = {\n", + " 'name': 'free_text',\n", + " 'answer': 'sample text'\n", + "}" + ], + "metadata": { + "id": "dMpF0LfZZZpg" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 1: Import data rows into Catalog" + ], + "metadata": { + "id": "U-o15yu9IPDo" + } + }, + { + "cell_type": "code", + "source": [ + "# send a sample image as batch to the project\n", + "test_img_url = {\n", + " \"row_data\": \"https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html\",\n", + " \"global_key\": str(uuid.uuid4())\n", + "}\n", + "dataset = client.create_dataset(name=\"html demo dataset\")\n", + "data_row = dataset.create_data_row(test_img_url)\n", + "print(data_row)" + ], + "metadata": { + "id": "HjH9gTV8IBG9", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e7c7dd14-293e-42e7-d93e-faebdbbab036" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 2: Create/select an Ontology for your model predictions\n", + "Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.\n" + ], + "metadata": { + "id": "oy0umzuNIceP" + } + }, + { + "cell_type": "code", + "source": [ + "## Setup the ontology and link the tools created above.\n", + "\n", + "ontology_builder = OntologyBuilder(\n", + " classifications=[ # List of Classification objects\n", + " Classification( \n", + " class_type=Classification.Type.RADIO, \n", + " instructions=\"radio_question\", \n", + " options=[Option(value=\"first_radio_answer\")]\n", + " ),\n", + " Classification( \n", + " class_type=Classification.Type.RADIO, \n", + " instructions=\"radio_question_sub\", \n", + " options=[\n", + " Option(value=\"first_radio_answer\",\n", + " options=[\n", + " Classification(\n", + " class_type=Classification.Type.RADIO,\n", + " instructions=\"sub_radio_question\",\n", + " options=[\n", + " Option(value=\"first_sub_radio_answer\")\n", + " ]\n", + " ),\n", + " ]\n", + " )\n", + " ],\n", + " ),\n", + " Classification( \n", + " class_type=Classification.Type.CHECKLIST, \n", + " instructions=\"checklist_question\", \n", + " options=[\n", + " Option(value=\"first_checklist_answer\"),\n", + " Option(value=\"second_checklist_answer\"), \n", + " Option(value=\"third_checklist_answer\") \n", + " ]\n", + " ), \n", + " Classification( \n", + " class_type=Classification.Type.TEXT,\n", + " instructions=\"free_text\"\n", + " ),\n", + " Classification(\n", + " class_type=Classification.Type.CHECKLIST, \n", + " instructions=\"nested_checklist_question\",\n", + " options=[\n", + " Option(\"first_checklist_answer\",\n", + " options=[\n", + " Classification(\n", + " class_type=Classification.Type.CHECKLIST, \n", + " instructions=\"sub_checklist_question\", \n", + " options=[Option(\"first_sub_checklist_answer\")]\n", + " )\n", + " ]\n", + " )\n", + " ]\n", + " )\n", + " ]\n", + ")\n", + "\n", + "ontology = client.create_ontology(\"Ontology Text Predictions\", ontology_builder.asdict(), media_type=MediaType.Html)\n" + ], + "metadata": { + "id": "Kt4XWWqgIiWk" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 3: Create a Model and Model Run" + ], + "metadata": { + "id": "ZjN8jxHvIvHP" + } + }, + { + "cell_type": "code", + "source": [ + "# create Model\n", + "model = client.create_model(name=\"HTML_model_run_\" + str(uuid.uuid4()), \n", + " ontology_id=ontology.uid)\n", + "# create Model Run\n", + "model_run = model.create_model_run(\"iteration 1\")" + ], + "metadata": { + "id": "8n-AvzdiOR6d" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 4: Send data rows to the Model Run" + ], + "metadata": { + "id": "NX6L0axRJN5J" + } + }, + { + "cell_type": "code", + "source": [ + "model_run.upsert_data_rows([data_row.uid])" + ], + "metadata": { + "id": "6sngCgIwJSae", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "630f3187-5377-4607-af50-5c8250fd431a" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 5. Create the predictions payload\n", + "\n", + "Create the annotations payload using the snippets of code in the **Supported Predictions** section.\n", + "\n", + "Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.\n", + "\n", + "The resulting label_ndjson should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)" + ], + "metadata": { + "id": "6FZyvnrqSGuc" + } + }, + { + "cell_type": "code", + "source": [ + "# Create a Label for predictions\n", + "label_prediction = Label(\n", + " data=ImageData(uid=data_row.uid),\n", + " annotations = [\n", + " radio_prediction, \n", + " radio_prediction_nested,\n", + " checklist_prediction,\n", + " text_prediction\n", + " ]\n", + ")\n", + "\n", + "# Create a label list \n", + "label_list_prediction = [label_prediction]\n", + "\n", + "# Convert the prediction label from a Labelbox class object to the underlying NDJSON format required for upload - uploads can be directly built in this syntax as well\n", + "ndjson_prediction = list(NDJsonConverter.serialize(label_list_prediction))" + ], + "metadata": { + "id": "zv2OLTXKSGWv" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "If using NDJSON: " + ], + "metadata": { + "id": "YXdXkDcIV4bs" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "ndjson_prediction_method2 = []\n", + "for annot in [\n", + " radio_prediction_ndjson, \n", + " nested_radio_prediction_ndjson,\n", + " checklist_prediction_ndjson,\n", + " text_prediction_ndjson,\n", + " nested_checklist_prediction_ndjson\n", + "]:\n", + " annot.update({\n", + " 'uuid': str(uuid.uuid4()),\n", + " 'dataRow': {'id': data_row.uid},\n", + " })\n", + " ndjson_prediction_method2.append(annot)" + ], + "metadata": { + "id": "F-Y7sSyAV3tn" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 6. Upload the predictions payload to the Model Run " + ], + "metadata": { + "id": "viFHCnBeTD1Y" + } + }, + { + "cell_type": "code", + "source": [ + "# Upload the prediction label to the Model Run\n", + "upload_job_prediction = model_run.add_predictions(\n", + " name=\"prediction_upload_job\"+str(uuid.uuid4()),\n", + " predictions=ndjson_prediction_method2)\n", + "\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_prediction.errors)" + ], + "metadata": { + "id": "uCI8pLTITQNG", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6b6841ae-cca5-4813-d1ac-1e964ae6c104" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Errors: []\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Step 7: Send annotations to the Model Run \n", + "To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run." + ], + "metadata": { + "id": "T-ZHWWI3JgmX" + } + }, + { + "cell_type": "markdown", + "source": [ + "##### 7.1. Create a labelbox project" + ], + "metadata": { + "id": "CYRiqHr2O_aL" + } + }, + { + "cell_type": "code", + "source": [ + "# Create a Labelbox project\n", + "project = client.create_project(name=\"HTML prediction import demo\", \n", + " queue_mode=QueueMode.Batch,\n", + " # Quality Settings setup \n", + " auto_audit_percentage=1,\n", + " auto_audit_number_of_labels=1,\n", + " media_type=MediaType.Html)\n", + "project.setup_editor(ontology)" + ], + "metadata": { + "id": "jEtoDiDrPFvI" + }, + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "##### 7.2. Create a batch to send to the project " + ], + "metadata": { + "id": "7FEyC-nBPPuD" + } + }, + { + "cell_type": "code", + "source": [ + "project.create_batch(\n", + " \"batch_prediction_html\", # Each batch in a project must have a unique name\n", + " dataset.export_data_rows(), # A list of data rows or data row ids\n", + " 5 # priority between 1(Highest) - 5(lowest)\n", + ")" + ], + "metadata": { + "id": "WRr5tdVEPXXy", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ff468996-5876-4001-99e8-73b597bf5083" + }, + "execution_count": 16, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "##### 7.3 Create the annotations payload" + ], + "metadata": { + "id": "FTGAI730UlZ3" + } + }, + { + "cell_type": "code", + "source": [ + "###### Annotations ###### \n", + "\n", + "\n", + "radio_annotation_ndjson = {\n", + " 'name': 'radio_question',\n", + " 'answer': {'name': 'first_radio_answer'}\n", + "}\n", + "\n", + "\n", + "nested_radio_annotation_ndjson = {\n", + " 'name': 'radio_question_sub',\n", + " 'answer': {\n", + " 'name': 'first_radio_answer',\n", + " 'classifications': [{\n", + " 'name':'sub_radio_question',\n", + " 'answer': { 'name' : 'first_sub_radio_answer'}\n", + " }]\n", + " }\n", + "}\n", + "\n", + "\n", + "nested_checklist_annotation_ndjson = {\n", + " \"name\": \"nested_checklist_question\",\n", + " \"answer\": [{\n", + " \"name\": \"first_checklist_answer\", \n", + " \"classifications\" : [\n", + " {\n", + " \"name\": \"sub_checklist_question\", \n", + " \"answer\": {\"name\": \"first_sub_checklist_answer\" }\n", + " } \n", + " ] \n", + " }]\n", + "}\n", + "\n", + "checklist_annotation_ndjson = {\n", + " 'name': 'checklist_question',\n", + " 'answer': [\n", + " {'name': 'first_checklist_answer', }\n", + " ]\n", + "}\n", + "\n", + "\n", + "text_annotation_ndjson = {\n", + " 'name': 'free_text',\n", + " 'answer': 'sample text'\n", + "}\n" + ], + "metadata": { + "id": "A8_HVvu9Uvfl" + }, + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "##### 7.4. Create the label object" + ], + "metadata": { + "id": "8QwmguFvPltl" + } + }, + { + "cell_type": "code", + "source": [ + "ndjson_annotation = []\n", + "for annot in [\n", + " radio_annotation_ndjson,\n", + " nested_radio_annotation_ndjson,\n", + " nested_checklist_annotation_ndjson ,\n", + " checklist_annotation_ndjson,\n", + " text_annotation_ndjson\n", + " ]:\n", + " annot.update({\n", + " 'uuid': str(uuid.uuid4()),\n", + " 'dataRow': {'id': data_row.uid},\n", + " })\n", + " ndjson_annotation.append(annot)" + ], + "metadata": { + "id": "9gD_alThQA3G" + }, + "execution_count": 18, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "##### 7.5. Upload annotations to the project using Label Import" + ], + "metadata": { + "id": "nGVNQlvPQ-kF" + } + }, + { + "cell_type": "code", + "source": [ + "upload_job_annotation = LabelImport.create_from_objects(\n", + " client = client,\n", + " project_id = project.uid,\n", + " name=\"html_annotation_import\" + str(uuid.uuid4()),\n", + " labels=ndjson_annotation)\n", + "\n", + "upload_job_annotation.wait_until_done()\n", + "# Errors will appear for annotation uploads that failed.\n", + "print(\"Errors:\", upload_job_annotation.errors)\n" + ], + "metadata": { + "id": "HYh9AzrlRYX-", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ef8c5932-35df-4feb-9879-81fd89c5897b" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Errors: []\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "##### 7.6 Send the annotations to the Model Run" + ], + "metadata": { + "id": "Y3rgM-5cRrxM" + } + }, + { + "cell_type": "code", + "source": [ + "# get the labels id from the project\n", + "label_ids = [x['ID'] for x in project.export_labels(download=True)]\n", + "model_run.upsert_labels(label_ids)" + ], + "metadata": { + "id": "i2BrS8CcSBzo", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "9234850e-fc29-427e-e75e-d16237b3ef08" + }, + "execution_count": 20, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Optional deletions for cleanup \n" + ], + "metadata": { + "id": "DMtOfWWDWFbJ" + } + }, + { + "cell_type": "code", + "source": [ + "# project.delete()\n", + "# dataset.delete()" + ], + "metadata": { + "id": "aAhkyvJlWK1p" + }, + "execution_count": 21, + "outputs": [] + } + ] +}