<td>
   <a target="_blank" href="https://labelbox.com" ><img src="https://labelbox.com/blog/content/images/2021/02/logo-v4.svg" width=256/></a>
</td>

<td>
<a href="https://colab.research.google.com/github/Labelbox/labelbox-python/blob/master/examples/prediction_upload/text_predictions.ipynb" target="_blank"><img
src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
</td>

<td>
<a href="https://github.com/Labelbox/labelbox-python/blob/master/examples/prediction_upload/text_predictions.ipynb" target="_blank"><img
src="https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white" alt="GitHub"></a>
</td>

# Text Prediction Import
* This notebook walks you through the process of uploading model predictions to a Model Run. This notebook provides an example for each supported prediction type for text assets. 

Supported annotations that can be uploaded through the SDK: 

* Entity
* Classification radio 
* Classification checklist 
* Classification free-form text 

**Not** supported:
* Segmentation mask
* Polygon
* Bounding box 
* Polyline
* Point 


A Model Run is a container for the predictions, annotations and metrics of a specific experiment in your ML model development cycle.



## Setup

In [None]:
!pip install -q "labelbox[data]"

In [None]:
import labelbox as lb
import labelbox.types as lb_types
import uuid

## Replace with your API Key 
Guides on [Create an API key](https://docs.labelbox.com/docs/create-an-api-key)

In [None]:
API_KEY= ""
client = lb.Client(API_KEY)

## Supported Predictions

In [None]:
########## Entities ##########

# Python annotation
named_entity = lb_types.TextEntity(start=10, end=20)
entities_prediction = lb_types.ObjectAnnotation(value=named_entity, name = "named_entity", confidence=0.5)


# NDJSON
entities_prediction_ndjson = { 
    "name": "named_entity",
    "confidence": 0.5, 
    "location": { 
        "start": 10, 
        "end": 20 
    }
}

In [None]:
########## Classification - Radio (single choice ) ##########

# Python annotation 
radio_prediction = lb_types.ClassificationAnnotation(
    name="radio_question",
    value=lb_types.Radio(answer =
        lb_types.ClassificationAnswer(name = "first_radio_answer", confidence=0.5)
    )
)

# NDJSON
radio_prediction_ndjson = {
  "name": "radio_question",
  "answer": {"name": "first_radio_answer", "confidence": 0.5}
} 

In [None]:
########## Classification - Radio and Checklist (with subclassifcations)  ##########

nested_radio_prediction = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer",
              confidence=0.5 
            )
          )
        )
      ]
    )
  )
)

nested_radio_prediction_ndjson= {
  "name": "nested_radio_question",
  "answer": {
      "name": "first_radio_answer",
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications": [{
          "name":"sub_radio_question",
          "answer": { "name" : "first_sub_radio_answer", 
                     "confidence": 0.5}
        }]
    }
}

nested_checklist_prediction = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      confidence=0.5, # Confidence scores should be added to the answer 
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer",
            confidence=0.5,
          )]
        ))
      ]
    )]
  )
)

nested_checklist_prediction_ndjson = {
  "name": "nested_checklist_question",
  "answer": [{
      "name": "first_checklist_answer", 
      "confidence": 0.5, # Confidence scores should be added to the answer 
      "classifications" : [
        {
          "name": "sub_checklist_question", 
          "answer": {"name": "first_sub_checklist_answer", 
                     "confidence": 0.5}
        }          
      ]         
  }]
}

In [None]:
########## Checklist ##########

# Python annotation
checklist_prediction = lb_types.ClassificationAnnotation(
    name="checklist_question",
    value=lb_types.Checklist(
        answer = [
            lb_types.ClassificationAnswer(
                name = "first_checklist_answer",
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "second_checklist_answer", 
                confidence=0.5
            ),
            lb_types.ClassificationAnswer(
                name = "third_checklist_answer", 
                confidence=0.5
            )
    ])
  )


# NDJSON
checklist_prediction_ndjson = {
  "name": "checklist_question",
  "answer": [
    {"name": "first_checklist_answer", "confidence": 0.5}
  ]
}

In [None]:
########## Classification Free-Form text  ##########
# Confidence scores are not supported in the free text classifications

# Python annotation
text_prediction = lb_types.ClassificationAnnotation(
    name = "free_text", 
    value = lb_types.Text(answer="sample text")
)

#  NDJSON
text_prediction_ndjson = {
  "name": "free_text",
  "answer": "sample text"
}

## Step 1: Import data rows into Catalog

In [None]:
# send a sample image as batch to the project
global_key = "lorem-ipsum.txt"
test_img_url = {
    "row_data": "https://storage.googleapis.com/labelbox-sample-datasets/nlp/lorem-ipsum.txt",
    "global_key": global_key
}
dataset = client.create_dataset(
    name="text prediction demo dataset",
    iam_integration=None # Removing this argument will default to the organziation's default iam integration
    )
task = dataset.create_data_rows([test_img_url])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)

## Step 2: Create/select an Ontology for your model predictions
Your project should have the correct ontology setup with all the tools and classifications supported for your annotations, and the tool names and classification instructions should match the name/instructions fields in your annotations to ensure the correct feature schemas are matched.


In [None]:
## Setup the ontology and link the tools created above.

ontology_builder = lb.OntologyBuilder(
  classifications=[ # List of Classification objects
    lb.Classification(
      class_type=lb.Classification.Type.RADIO,
      name="radio_question", 
      options=[lb.Option(value="first_radio_answer")]
    ),
    lb.Classification(
      class_type=lb.Classification.Type.RADIO,
      name="nested_radio_question", 
      options=[
        lb.Option(value="first_radio_answer",
          options=[
              lb.Classification(
                class_type=lb.Classification.Type.RADIO,
                name="sub_radio_question",
                options=[
                  lb.Option(value="first_sub_radio_answer")
                ]
            ),
          ]
        )
      ],
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      name="checklist_question", 
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer"),
        lb.Option(value="third_checklist_answer")
      ]
    ), 
     lb.Classification(
      class_type=lb.Classification.Type.TEXT,
      name="free_text"
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      name="nested_checklist_question",
      options=[
          lb.Option("first_checklist_answer",
            options=[
              lb.Classification(
                  class_type=lb.Classification.Type.CHECKLIST,
                  name="sub_checklist_question", 
                  options=[lb.Option("first_sub_checklist_answer")]
              )
          ]
        )
      ]
    )
  ],
  tools=[ # List of Tool objects
         lb.Tool(tool=lb.Tool.Type.NER,
              name="named_entity")
    ]
)

ontology = client.create_ontology("Ontology Text Predictions", ontology_builder.asdict() , media_type=lb.MediaType.Text)


## Step 3: Create a Model and Model Run

In [None]:
# create Model
model = client.create_model(name="text_model_run_"+ str(uuid.uuid4()), 
                            ontology_id=ontology.uid)
# create Model Run
model_run = model.create_model_run("iteration 1")

## Step 4: Send data rows to the Model Run

In [None]:
model_run.upsert_data_rows(global_keys=[global_key])

## Step 5. Create the predictions payload

Create the prediction payload using the snippets of code in the **Supported Predcitions** section

Labelbox support two formats for the annotations payload: NDJSON and Python Annotation types. Both are described below to compose your annotations into Labels attached to the data rows.

The resulting label_ndjson should have exactly the same content for annotations that are supported by both

In [None]:
# Create a Label for predictions
label_predictions = []
label_predictions.append(
  lb_types.Label(
    data=lb_types.TextData(global_key=global_key),
    annotations = [
      entities_prediction, 
      nested_radio_prediction,
      radio_prediction, 
      checklist_prediction,
      nested_checklist_prediction,
      text_prediction,
    ]
  )
)

If using NDJSON: 

In [None]:
label_ndjson_predictions= []
for annot in [
    entities_prediction_ndjson, 
    radio_prediction_ndjson, 
    checklist_prediction_ndjson,
    text_prediction_ndjson, 
    nested_radio_prediction_ndjson,
    nested_checklist_prediction_ndjson
  ]:
  annot.update({
      "dataRow": {"globalKey": global_key}
  })
  label_ndjson_predictions.append(annot)

## Step 6. Upload the predictions payload to the Model Run 

In [None]:
# Upload the prediction label to the Model Run
upload_job_prediction = model_run.add_predictions(
    name="prediction_upload_job"+str(uuid.uuid4()),
    predictions=label_ndjson_predictions)

# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_prediction.errors)
print("Status of uploads: ", upload_job_prediction.statuses)

## Step 7: Send annotations to the Model Run 
To send annotations to a Model Run, we must first import them into a project, create a label payload and then send them to the Model Run.

##### 7.1. Create a labelbox project

In [None]:
# Create a Labelbox project
project = client.create_project(name="Text Prediction Import Demo",
                                    auto_audit_percentage=1,
                                    auto_audit_number_of_labels=1,
                                    media_type=lb.MediaType.Text)
project.setup_editor(ontology)

##### 7.2. Create a batch to send to the project 

In [None]:
project.create_batch(
  "batch_text_prediction_demo", # Each batch in a project must have a unique name
  global_keys=[global_key], # Paginated collection of data row objects, list of data row ids or global keys
  priority=5 # priority between 1(Highest) - 5(lowest)
)

##### 7.3 Create the annotations payload

In [None]:
named_entity = lb_types.TextEntity(start=10, end=20)
entities_annotation = lb_types.ObjectAnnotation(value=named_entity, name = "named_entity")


radio_annotation = lb_types.ClassificationAnnotation(
    name="radio_question",
    value=lb_types.Radio(answer =
        lb_types.ClassificationAnswer(name = "first_radio_answer")
    )
)

nested_radio_annotation = lb_types.ClassificationAnnotation(
  name="nested_radio_question",
  value=lb_types.Radio(
    answer=lb_types.ClassificationAnswer(
      name="first_radio_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_radio_question",
          value=lb_types.Radio(
            answer=lb_types.ClassificationAnswer(
              name="first_sub_radio_answer"
            )
          )
        )
      ]
    )
  )
)

nested_checklist_annotation = lb_types.ClassificationAnnotation(
  name="nested_checklist_question",
  value=lb_types.Checklist(
    answer=[lb_types.ClassificationAnswer(
      name="first_checklist_answer",
      classifications=[
        lb_types.ClassificationAnnotation(
          name="sub_checklist_question",
          value=lb_types.Checklist(
            answer=[lb_types.ClassificationAnswer(
            name="first_sub_checklist_answer"
          )]
        ))
      ]
    )]
  )
)

checklist_annotation = lb_types.ClassificationAnnotation(
    name="checklist_question",
    value=lb_types.Checklist(
        answer = [
            lb_types.ClassificationAnswer(
                name = "first_checklist_answer"
            ),
            lb_types.ClassificationAnswer(
                name = "second_checklist_answer" 
            ),
            lb_types.ClassificationAnswer(
                name = "third_checklist_answer" 
            )
    ])
  )

text_annotation = lb_types.ClassificationAnnotation(
    name = "free_text", 
    value = lb_types.Text(answer="sample text")
)

##### 7.4. Create the label object

In [None]:
# Create a Label object by identifying the applicable data row in Labelbox and providing a list of annotations
# Create a Label for predictions
label = []
label.append(
  lb_types.Label(
    data=lb_types.TextData(global_key=global_key),
    annotations = [
      entities_annotation, 
      nested_radio_annotation,
      radio_annotation, 
      checklist_annotation,
      nested_checklist_annotation,
      text_annotation,
    ]
  )
)

##### 7.5. Upload annotations to the project using Label Import

In [None]:
upload_job_annotation = lb.LabelImport.create_from_objects(
    client = client,
    project_id = project.uid,
    name="text_label_import_job"+ str(uuid.uuid4()),
    labels=label)

upload_job_annotation.wait_until_done()
# Errors will appear for annotation uploads that failed.
print("Errors:", upload_job_annotation.errors)
print("Status of uploads: ", upload_job_annotation.statuses)

##### 7.6 Send the annotations to the Model Run

In [None]:
# get the labels id from the project
model_run.upsert_labels(project_id=project.uid)

## Optional deletions for cleanup 


In [None]:
# project.delete()
# dataset.delete()