<td>   <a target="_blank" href="https://labelbox.com" ><img src="https://labelbox.com/blog/content/images/2021/02/logo-v4.svg" width=256/></a></td>


<td>
<a href="https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/conversational_LLM.ipynb" target="_blank"><img
src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
</td>

<td>
<a href="https://github.com/Labelbox/labelbox-python/tree/develop/examples/annotation_import/offline_multimodal_chat_evaluation.ipynb" target="_blank"><img
src="https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white" alt="GitHub"></a>
</td>

# Offline multimodal chat evaluation annotation import

This notebook provides examples of each annotation type supported by the offline multimodal chat evalution project and walks through the complete process of importing annotations as prelabels (model assisted labeling) or ground truth.

# Set up

In [None]:
%pip install -q "labelbox[data]"

In [None]:
import labelbox as lb
import labelbox.types as lb_types
import uuid

# Replace with your API key

Replace the value of `API_KEY` with a valid [API key]([ref:create-api-key](https://docs.labelbox.com/reference/create-api-key)  to connect to the Labelbox client.

In [None]:
API_KEY = ""
client = lb.Client(api_key=API_KEY)

In [None]:
from labelbox.types import (
    Label,
    MessageEvaluationTaskAnnotation,
    MessageInfo,
    MessageMultiSelectionTask,
    MessageRankingTask,
    MessageSingleSelectionTask,
    OrderedMessageInfo,
)

# Supported annotations for multimodal chat evaluation

### Tool: Message ranking

In [None]:
message_ranking_annotation = lb_types.MessageEvaluationTaskAnnotation(
    name="Message ranking",
    value=MessageRankingTask(
        parent_message_id="clxfznjb800073b6v43ppx9ca",
        ranked_messages=[
            OrderedMessageInfo(
                message_id="clxfzocbm00083b6v8vczsept",
                model_config_name="GPT 4 with temperature 0.7",
                order=1,
            ),
            OrderedMessageInfo(
                message_id="clxfzocbm00093b6vx4ndisub",
                model_config_name="GPT 5",
                order=2,
            ),
        ],
    ),
)

message_ranking_annotation_ndjson = {
    "name": "model output multi ranking",
    "messageEvaluationTask": {
        "format": "message-ranking",
        "data": {
            "parentMessageId": "clxfzhair00023b6vb607bqo6",
            "rankedMessages": [
                {
                    "messageId": "clxfzi3r400063b6vuaeajylo",
                    "modelConfigName": "GPT 4 with temperature 0.7",
                    "order": 2,
                },
                {
                    "messageId": "clxfzi3r400053b6vm5udpdgo",
                    "modelConfigName": "GPT 5",
                    "order": 1,
                },
            ],
        },
    },
}

### Tool: Single message selection

In [None]:
single_message_selection_annotation = lb_types.MessageEvaluationTaskAnnotation(
    name="Single message selection",
    value=MessageSingleSelectionTask(
        message_id="clxfzi3r400053b6vm5udpdgo",
        parent_message_id="clxfzhair00023b6vb607bqo6",
        model_config_name="GPT 4 with temperature 0.7",
    ),
)
single_message_selection_annotation_ndjson = {
    "name": "Single message selection",
    "messageEvaluationTask": {
        "format": "message-single-selection",
        "data": {
            "messageId": "clxfzi3r400053b6vm5udpdgo",
            "parentMessageId": "clxfzhair00023b6vb607bqo6",
            "modelConfigName": "GPT 4 with temperature 0.7",
        },
    },
}

### Tool: Multiple message selection

In [None]:
multiple_message_selection_annotation = lb_types.MessageEvaluationTaskAnnotation(
    name="Multi message selection",
    value=MessageMultiSelectionTask(
        parent_message_id="clxfzhair00023b6vb607bqo6",
        selected_messages=[
            MessageInfo(
                message_id="clxfzi3r400063b6vuaeajylo",
                model_config_name="GPT 4 with temperature 0.7",
            ),
            MessageInfo(
                message_id="clxfzi3r400053b6vm5udpdgo",
                model_config_name="GPT 5",
            ),
        ],
    ),
)
multiple_message_selection_annotation_ndjson = {
    "name": "Multi message selection",
    "messageEvaluationTask": {
        "format": "message-multi-selection",
        "data": {
            "parentMessageId": "clxfzhair00023b6vb607bqo6",
            "selectedMessages": [
                {
                    "messageId": "clxfzi3r400063b6vuaeajylo",
                    "modelConfigName": "GPT 4 with temperature 0.7",
                },
                {
                    "messageId": "clxfzi3r400053b6vm5udpdgo",
                    "modelConfigName": "GPT 5",
                },
            ],
        },
    },
}

### Classification: Radio (single-choice)

In [None]:
radio_annotation = lb_types.ClassificationAnnotation(
    name="Choose the best response",
    value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
        name="Response B")),
)

radio_annotation_ndjson = {
    "name": "Choose the best response",
    "answer": {
        "name": "Response B"
    },
}

### Classification: Free-form text

In [None]:
text_annotation = lb_types.ClassificationAnnotation(
    name="Provide a reason for your choice",
    value=lb_types.Text(answer="the answer to the text questions right here"),
)

text_annotation_ndjson = {
    "name": "Provide a reason for your choice",
    "answer": "This is the more concise answer",
}

### Classification: Checklist (multi-choice)

In [None]:
checklist_annotation = lb_types.ClassificationAnnotation(
    name="checklist_convo",  # must match your ontology feature"s name
    value=lb_types.Checklist(answer=[
        lb_types.ClassificationAnswer(name="first_checklist_answer"),
        lb_types.ClassificationAnswer(name="second_checklist_answer"),
    ]),
    message_id="clxfznjb800073b6v43ppx9ca",  # Message specific annotation
)

checklist_annotation_ndjson = {
    "name": "checklist_convo",
    "answers": [
        {
            "name": "first_checklist_answer"
        },
        {
            "name": "second_checklist_answer"
        },
    ],
    "messageId": "clxfznjb800073b6v43ppx9ca",
}

## Step 1: Import data rows into Catalog

In [None]:
mmc_asset = "https://storage.googleapis.com/labelbox-datasets/conversational_model_evaluation_sample/offline-model-chat-evaluation.json"
global_key = "offline-multimodal_chat_evaluation"

# Upload data rows
convo_data = {
    "row_data": mmc_asset,
    "global_key": global_key
}

# Create a dataset
dataset = client.create_dataset(name="offline-multimodal_chat_evaluation_demo")
# Create a datarow
task = dataset.create_data_rows([convo_data])
task.wait_till_done()
print("Errors:",task.errors)
print("Failed data rows:", task.failed_data_rows)

## Step 2: Create/select an Ontology

In [None]:
ontology_builder = lb.OntologyBuilder(
    tools=[
        lb.Tool(
            tool=lb.Tool.Type.MESSAGE_SINGLE_SELECTION,
            name="Single message selection",
        ),
        lb.Tool(
            tool=lb.Tool.Type.MESSAGE_MULTI_SELECTION,
            name="Multi message selection",
        ),
        lb.Tool(tool=lb.Tool.Type.MESSAGE_RANKING, name="Message ranking"),
    ],
  classifications=[
    lb.Classification(
      class_type=lb.Classification.Type.RADIO,
      scope=lb.Classification.Scope.GLOBAL,
      name="Choose the best response",
      options=[lb.Option(value="Response A"), lb.Option(value="Response B"), lb.Option(value="Tie")]
    ),
    lb.Classification(
      class_type=lb.Classification.Type.TEXT,
      name="Provide a reason for your choice"
    ),
    lb.Classification(
      class_type=lb.Classification.Type.CHECKLIST,
      scope=lb.Classification.Scope.INDEX,
      name="checklist_convo",
      options=[
        lb.Option(value="first_checklist_answer"),
        lb.Option(value="second_checklist_answer")
      ]
    )
  ]
)
# Create ontology
ontology = client.create_ontology(
    "MMC ontology",
    ontology_builder.asdict(),
    media_type=lb.MediaType.Conversational,
    ontology_kind=lb.OntologyKind.ModelEvaluation,
)

## Step 3: Create a labeling project

In [None]:
# Create Labelbox project
project = client.create_offline_model_evaluation_project(
    name="Offline MMC Import Demo",
    description="<project_description>",  # optional
    media_type=lb.MediaType.Conversational,
)

# Setup your ontology
project.connect_ontology(
    ontology)  # Connect your ontology and editor to your project

## Step 4: Send a batch of data rows to the project

In [None]:
# Create a batch to send to your project
batch = project.create_batch(
    "first-batch-convo-demo",  # Each batch in a project must have a unique name
    global_keys=[
        global_key
    ],  # Paginated collection of data row objects, list of data row ids or global keys
    priority=5,  # priority between 1(Highest) - 5(lowest)
)

print("Batch: ", batch)

## Step 5: Create the annotations payload

Python annotation

In [None]:
label = []
label.append(
    lb_types.Label(
        data={"global_key": global_key},
        annotations=[
            message_ranking_annotation,
            single_message_selection_annotation,
            multiple_message_selection_annotation,
            text_annotation,
            checklist_annotation,
            radio_annotation,
        ],
    ))

NDJSON annotation

In [None]:
label_ndjson = []
for annotations in [
        message_ranking_annotation_ndjson,
        single_message_selection_annotation_ndjson,
        multiple_message_selection_annotation_ndjson,
        text_annotation_ndjson,
        checklist_annotation_ndjson,
        radio_annotation_ndjson,
]:
    annotations.update({"dataRow": {"globalKey": global_key}})
    label_ndjson.append(annotations)

## Step 6: import annotations to a project as pre-labels or ground truth labels

### Import as prelabels (model assisted labeling)

In [None]:
# @title
upload_job = lb.MALPredictionImport.create_from_objects(
    client=client,
    project_id=project.uid,
    name=f"mal_job-{str(uuid.uuid4())}",
    predictions=label,
)

upload_job.wait_until_done()
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)

### Import as ground truth labels

In [None]:
upload_job = lb.LabelImport.create_from_objects(
    client=client,
    project_id=project.uid,
    name="label_import_job" + str(uuid.uuid4()),
    labels=label,
)

upload_job.wait_until_done()
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)