<td>
   <a target="_blank" href="https://labelbox.com" ><img src="https://labelbox.com/blog/content/images/2021/02/logo-v4.svg" width=256/></a>
</td>

<td>
<a href="https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/prediction_upload/html_predictions.ipynb" target="_blank"><img
src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
</td>

<td>
<a href="https://github.com/Labelbox/labelbox-python/blob/develop/examples/prediction_upload/html_predictions.ipynb" target="_blank"><img
src="https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white" alt="GitHub"></a>
</td>

# HTML Prediction Import

This notebook walks you through the process of uploading model predictions to a Model Run. This notebook provides an example for each supported prediction type for HTML assets.

**Supported predictions**
- Radio Classification 
- Checklist Classification
- free-text Classification

**Not supported:**
- Bounding Box
- Polygon
- Point
- Polyline
- Masks
- NER

A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle.

## Setup

In [1]:
!pip install -q 'labelbox[data]'

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m185.5/185.5 KB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for pygeotile (setup.py) ... [?25l[?25hdone


In [2]:
from labelbox.schema.ontology import OntologyBuilder, Tool, Classification, Option
from labelbox import Client, MALPredictionImport, LabelImport
from labelbox.data.serialization import NDJsonConverter
from labelbox.schema.media_type import MediaType
from labelbox.data.annotation_types import (
    Label, ImageData, ObjectAnnotation, MaskData,
    Rectangle, Point, Line, Mask, Polygon,
    Radio, Checklist, Text,
    ClassificationAnnotation, ClassificationAnswer
)
import uuid
import numpy as np
from labelbox.schema.queue_mode import QueueMode

## Replace with your API Key 
Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)

In [3]:
API_KEY =  None
client = Client(API_KEY)

## Supported Predictions

In [4]:
########### Radio Classification ###########
radio_prediction = ClassificationAnnotation(
  name="radio_question", 
  value=Radio(answer = ClassificationAnswer(name = "second_radio_answer", confidence=0.5))
)


radio_prediction_ndjson = {
  'name': 'radio_question',
  'answer': {'name': 'first_radio_answer'}
}



In [5]:
#### Nested Classifications ######

# Python annotation
radio_prediction_nested = ClassificationAnnotation(
    name="radio_question_sub", 
    value=Radio(answer = ClassificationAnswer(name = "first_radio_answer", confidence=0.5)),
    classifications=[
    	ClassificationAnnotation(
        	name="sub_radio_question",
      		value=Radio(answer=ClassificationAnswer(name="first_sub_radio_answer", confidence=0.5))
      )
    ]
)

# NDJSON
nested_radio_prediction_ndjson = {
  'name': 'radio_question_sub',
  'answer': {
      'name': 'first_radio_answer',
      "confidence": 0.5,
      'classifications': [{
          'name':'sub_radio_question',
          'answer': { 'name' : 'first_sub_radio_answer', 'confidence': 0.5 }
        }]
    }
}

# Nested classification for checklits is only supported with NDJSON tools
nested_checklist_prediction_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer", 
      "confidence": 0.5,
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {"name": "first_sub_checklist_answer", "confidence": 0.5 }
        }          
      ]         
  }]
}

In [6]:
########## Checklist ##########

# Python annotation
checklist_prediction = ClassificationAnnotation(
    name="checklist_question",
    value=Checklist(
        answer = [
            ClassificationAnswer(
                name = "first_checklist_answer",
                confidence=0.5
            ),
            ClassificationAnswer(
                name = "second_checklist_answer", 
                confidence=0.5
            ),
            ClassificationAnswer(
                name = "third_checklist_answer", 
                confidence=0.5
            )
    ])
  )


# NDJSON
checklist_prediction_ndjson = {
  'name': 'checklist_question',
  'confidence': 0.5,
  'answer': [
    {'name': 'first_checklist_answer', 'confidence': 0.5}
  ]
}


In [7]:
########## Classification Free-Form text  ##########
## Text classifications do not support confidence values
# Python annotation
text_prediction = ClassificationAnnotation(
    name = "free_text", 
    value = Text(answer="sample text")
)

#  NDJSON
text_prediction_ndjson = {
  'name': 'free_text',
  'answer': 'sample text'
}

## Step 1: Import data rows into Catalog

In [8]:
# send a sample image as batch to the project
test_img_url = {
    "row_data": "https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html",
    "global_key": str(uuid.uuid4())
}
dataset = client.create_dataset(name="html demo dataset")
data_row = dataset.create_data_row(test_img_url)
print(data_row)

<DataRow {
    "created_at": "2023-01-24 22:36:16+00:00",
    "external_id": null,
    "global_key": "c23d513c-57f1-48ad-a233-53ff99a795c4",
    "media_attributes": {},
    "metadata": [],
    "metadata_fields": [],
    "row_data": "https://storage.googleapis.com/labelbox-datasets/html_sample_data/sample_html_2.html",
    "uid": "cldatgxbz33qk07y0cknjgnsf",
    "updated_at": "2023-01-24 22:36:16+00:00"
}>


## Step 2: Create/select an Ontology for your model predictions
Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.


In [9]:
## Setup the ontology and link the tools created above.

ontology_builder = OntologyBuilder(
  classifications=[ # List of Classification objects
    Classification( 
      class_type=Classification.Type.RADIO, 
      instructions="radio_question", 
      options=[Option(value="first_radio_answer")]
    ),
    Classification( 
      class_type=Classification.Type.RADIO, 
      instructions="radio_question_sub", 
      options=[
        Option(value="first_radio_answer",
          options=[
              Classification(
                class_type=Classification.Type.RADIO,
                instructions="sub_radio_question",
                options=[
                  Option(value="first_sub_radio_answer")
                ]
            ),
          ]
        )
      ],
    ),
    Classification( 
      class_type=Classification.Type.CHECKLIST, 
      instructions="checklist_question", 
      options=[
        Option(value="first_checklist_answer"),
        Option(value="second_checklist_answer"), 
        Option(value="third_checklist_answer")            
      ]
    ), 
     Classification( 
      class_type=Classification.Type.TEXT,
      instructions="free_text"
    ),
    Classification(
      class_type=Classification.Type.CHECKLIST, 
      instructions="nested_checklist_question",
      options=[
          Option("first_checklist_answer",
            options=[
              Classification(
                  class_type=Classification.Type.CHECKLIST, 
                  instructions="sub_checklist_question", 
                  options=[Option("first_sub_checklist_answer")]
              )
          ]
        )
      ]
    )
  ]
)

ontology = client.create_ontology("Ontology Text Predictions", ontology_builder.asdict(), media_type=MediaType.Html)


## Step 3: Create a Model and Model Run

In [10]:
# create Model
model = client.create_model(name="HTML_model_run_" + str(uuid.uuid4()), 
                            ontology_id=ontology.uid)
# create Model Run
model_run = model.create_model_run("iteration 1")

## Step 4: Send data rows to the Model Run

In [11]:
model_run.upsert_data_rows([data_row.uid])

True

## Step 5. Create the predictions payload

Create the annotations payload using the snippets of code in the **Supported Predictions** section.

Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.

The resulting label_ndjson should have exactly the same content for annotations that are supported by both (with exception of the uuid strings that are generated)

In [12]:
# Create a Label for predictions
label_prediction = Label(
    data=ImageData(uid=data_row.uid),
    annotations = [
      radio_prediction, 
      radio_prediction_nested,
      checklist_prediction,
      text_prediction
    ]
)

# Create a label list 
label_list_prediction = [label_prediction]

# Convert the prediction label from a Labelbox class object to the underlying NDJSON format required for upload - uploads can be directly built in this syntax as well
ndjson_prediction = list(NDJsonConverter.serialize(label_list_prediction))

If using NDJSON: 

In [13]:

ndjson_prediction_method2 = []
for annot in [
    radio_prediction_ndjson, 
    nested_radio_prediction_ndjson,
    checklist_prediction_ndjson,
    text_prediction_ndjson,
    nested_checklist_prediction_ndjson
]:
  annot.update({
      'uuid': str(uuid.uuid4()),
      'dataRow': {'id': data_row.uid},
  })
  ndjson_prediction_method2.append(annot)

## Step 6. Upload the predictions payload to the Model Run 

In [14]:
# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
    name="prediction_upload_job"+str(uuid.uuid4()),
    predictions=ndjson_prediction_method2)

# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)

Errors: []


## Step 7: Send annotations to the Model Run 
To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run.

##### 7.1. Create a labelbox project

In [15]:
# Create a Labelbox project
project = client.create_project(name="HTML prediction import demo",                                    
                                    queue_mode=QueueMode.Batch,
                                    # Quality Settings setup 
                                    auto_audit_percentage=1,
                                    auto_audit_number_of_labels=1,
                                    media_type=MediaType.Html)
project.setup_editor(ontology)

##### 7.2. Create a batch to send to the project 

In [16]:
project.create_batch(
  "batch_prediction_html", # Each batch in a project must have a unique name
  dataset.export_data_rows(), # A list of data rows or data row ids
  5 # priority between 1(Highest) - 5(lowest)
)

<Batch ID: 8e97f0d0-9c37-11ed-8a1b-5b30e16bddf7>

##### 7.3 Create the annotations payload

In [17]:
######  Annotations ###### 


radio_annotation_ndjson = {
  'name': 'radio_question',
  'answer': {'name': 'first_radio_answer'}
}


nested_radio_annotation_ndjson = {
  'name': 'radio_question_sub',
  'answer': {
      'name': 'first_radio_answer',
      'classifications': [{
          'name':'sub_radio_question',
          'answer': { 'name' : 'first_sub_radio_answer'}
        }]
    }
}


nested_checklist_annotation_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer", 
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {"name": "first_sub_checklist_answer" }
        }          
      ]         
  }]
}

checklist_annotation_ndjson = {
  'name': 'checklist_question',
  'answer': [
    {'name': 'first_checklist_answer', }
  ]
}


text_annotation_ndjson = {
  'name': 'free_text',
  'answer': 'sample text'
}


##### 7.4. Create the label object

In [18]:
ndjson_annotation = []
for annot in [
    radio_annotation_ndjson,
    nested_radio_annotation_ndjson,
    nested_checklist_annotation_ndjson ,
    checklist_annotation_ndjson,
    text_annotation_ndjson
  ]:
  annot.update({
      'uuid': str(uuid.uuid4()),
      'dataRow': {'id': data_row.uid},
  })
  ndjson_annotation.append(annot)

##### 7.5. Upload annotations to the project using Label Import

In [19]:
upload_job_annotation = LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="html_annotation_import" + str(uuid.uuid4()),
    labels=ndjson_annotation)

upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)


Errors: []


##### 7.6 Send the annotations to the Model Run

In [20]:
# get the labels id from the project
label_ids = [x['ID'] for x in project.export_labels(download=True)]
model_run.upsert_labels(label_ids)

True

## Optional deletions for cleanup 


In [21]:
# project.delete()
# dataset.delete()