<td>
   <a target="_blank" href="https://labelbox.com" ><img src="https://labelbox.com/blog/content/images/2021/02/logo-v4.svg" width=256/></a>
</td>


<td>
<a href="https://colab.research.google.com/github/Labelbox/labelbox-python/blob/develop/examples/annotation_import/conversational_LLM.ipynb" target="_blank"><img
src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
</td>

<td>
<a href="https://github.com/Labelbox/labelbox-python/tree/develop/examples/annotation_import/conversational_LLM.ipynb" target="_blank"><img
src="https://img.shields.io/badge/GitHub-100000?logo=github&logoColor=white" alt="GitHub"></a>
</td>

# LLM pairwise comparison with Conversational text using MAL and Ground truth
This demo is meant to showcase how to upload conversational row data that contains model outputs for pairwise comparisons analysis.


In [None]:
%pip install -q "labelbox[data]"

# Set up

In [None]:
import labelbox as lb
import labelbox.types as lb_types
import uuid

# Replace with your API key

In [None]:
API_KEY = ""
client = lb.Client(api_key=API_KEY)

# Supported annotations for conversational text

### Entity 

In [None]:
ner_annotation = lb_types.ObjectAnnotation(
    name="ner",
    value=lb_types.ConversationEntity(start=0, end=8, message_id="message-1"),
)

ner_annotation_ndjson = {
    "name": "ner",
    "location": {
        "start": 0,
        "end": 8
    },
    "messageId": "message-1",
}

### Classification: Radio (single-choice)

In [None]:
radio_annotation = lb_types.ClassificationAnnotation(
    name="Choose the best response",
    value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
        name="Response B")),
)

radio_annotation_ndjson = {
    "name": "Choose the best response",
    "answer": {
        "name": "Response B"
    },
}

### Classification: Free-form text

In [None]:
text_annotation = lb_types.ClassificationAnnotation(
    name="Provide a reason for your choice",
    value=lb_types.Text(answer="the answer to the text questions right here"),
)

text_annotation_ndjson = {
    "name": "Provide a reason for your choice",
    "answer": "This is the more concise answer",
}

### Classification: Checklist (multi-choice)

In [None]:
checklist_annotation = lb_types.ClassificationAnnotation(
    name="checklist_convo",  # must match your ontology feature"s name
    value=lb_types.Checklist(answer=[
        lb_types.ClassificationAnswer(name="first_checklist_answer"),
        lb_types.ClassificationAnswer(name="second_checklist_answer"),
    ]),
    message_id="message-1",  # Message specific annotation
)

checklist_annotation_ndjson = {
    "name": "checklist_convo",
    "answers": [
        {
            "name": "first_checklist_answer"
        },
        {
            "name": "second_checklist_answer"
        },
    ],
    "messageId": "message-1",
}

### Classification: Nested radio and checklist

In [None]:
# Message based
nested_checklist_annotation = lb_types.ClassificationAnnotation(
    name="nested_checklist_question",
    message_id="message-1",
    value=lb_types.Checklist(answer=[
        lb_types.ClassificationAnswer(
            name="first_checklist_answer",
            classifications=[
                lb_types.ClassificationAnnotation(
                    name="sub_checklist_question",
                    value=lb_types.Checklist(answer=[
                        lb_types.ClassificationAnswer(
                            name="first_sub_checklist_answer")
                    ]),
                )
            ],
        )
    ]),
)
# Message based
nested_checklist_annotation_ndjson = {
    "name":
        "nested_checklist_question",
    "messageId":
        "message-1",
    "answer": [{
        "name":
            "first_checklist_answer",
        "classifications": [{
            "name": "sub_checklist_question",
            "answer": {
                "name": "first_sub_checklist_answer",
            },
        }],
    }],
}
# Global
nested_radio_annotation = lb_types.ClassificationAnnotation(
    name="nested_radio_question",
    value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
        name="first_radio_answer",
        classifications=[
            lb_types.ClassificationAnnotation(
                name="sub_radio_question",
                value=lb_types.Radio(answer=lb_types.ClassificationAnswer(
                    name="first_sub_radio_answer")),
            )
        ],
    )),
)
# Global
nested_radio_annotation_ndjson = {
    "name": "nested_radio_question",
    "answer": {
        "name":
            "first_radio_answer",
        "classifications": [{
            "name": "sub_radio_question",
            "answer": {
                "name": "first_sub_radio_answer"
            },
        }],
    },
}

## Step 1: Import data rows with "modelOutputs" into Catalog

In addition to your message based data, you will need to add a list of model outputs to your JSON file:

```
"modelOutputs" : [
  {
      "title": "Name of the response option",
      "content": "Content of the response",
      "modelConfigName": "Name of model configuration"
  }
]
```


#### Example of row_data with model outputs

In [None]:
pairwise_shopping_2 = """
 {
  "type": "application/vnd.labelbox.conversational",
  "version": 1,
  "messages": [
    {
      "messageId": "message-0",
      "timestampUsec": 1530718491,
      "content": "Hi! How can I help?",
      "user": {
        "userId": "Bot 002",
        "name": "Bot"
      },
      "align": "left",
      "canLabel": false
    },
    {
      "messageId": "message-1",
      "timestampUsec": 1530718503,
      "content": "I just bought a vacuum cleaner from you guys like a week ago and it's already broken!!",
      "user": {
        "userId": "User 00686",
        "name": "User"
      },
      "align": "right",
      "canLabel": true
    }

  ],
  "modelOutputs": [
    {
      "title": "Response A",
      "content": "I'm really sorry to hear that your vacuum cleaner is not working as expected. We certainly understand how frustrating this can be, especially with a recent purchase. I assure you, we're here to help!\n\n To proceed with resolving this issue, could you please provide some details about the problem you're experiencing with the vacuum cleaner? Additionally, if you could share your order number or receipt details, that would be very helpful. We will definitely look into the available options for you, which may include a replacement or a refund, depending on the specific circumstances and our return policy.\n\n Your satisfaction is our priority, and we'll do our best to resolve this matter promptly and to your satisfaction.",
      "modelConfigName": "GPT-3.5 with temperature 0"
    },
    {
      "title": "Response B",
      "content": "I'm sorry about the issue with your vacuum cleaner. Please send us the order number or receipt details so we can quickly assist you with a replacement. Your satisfaction is important to us!",
      "modelConfigName": "Fine Tuned GPT-3.5 with demo data"
    }
  ]
}
"""

In [None]:
global_key = "pairwise_shooping_asset" + str(uuid.uuid4())

# Upload data rows
convo_data = {
    "row_data":
        "https://storage.googleapis.com/labelbox-datasets/conversational-sample-data/pairwise_shopping_2.json",
    "global_key":
        global_key,
}

# Create a dataset
dataset = client.create_dataset(name="pairwise_annotation_demo")
# Create a datarows
task = dataset.create_data_rows([convo_data])
task.wait_till_done()
print("Errors:", task.errors)
print("Failed data rows:", task.failed_data_rows)

## Step 2: Create/select an Ontology

In [None]:
# Create an ontology with relevant classifications

ontology_builder = lb.OntologyBuilder(
    tools=[
        lb.Tool(tool=lb.Tool.Type.NER, name="ner"),
    ],
    classifications=[
        lb.Classification(
            class_type=lb.Classification.Type.RADIO,
            scope=lb.Classification.Scope.GLOBAL,
            name="Choose the best response",
            options=[
                lb.Option(value="Response A"),
                lb.Option(value="Response B"),
                lb.Option(value="Tie"),
            ],
        ),
        lb.Classification(
            class_type=lb.Classification.Type.TEXT,
            name="Provide a reason for your choice",
        ),
        lb.Classification(
            class_type=lb.Classification.Type.CHECKLIST,
            scope=lb.Classification.Scope.INDEX,
            name="checklist_convo",
            options=[
                lb.Option(value="first_checklist_answer"),
                lb.Option(value="second_checklist_answer"),
            ],
        ),
        lb.Classification(
            class_type=lb.Classification.Type.CHECKLIST,
            name="nested_checklist_question",
            scope=lb.Classification.Scope.INDEX,
            options=[
                lb.Option(
                    "first_checklist_answer",
                    options=[
                        lb.Classification(
                            class_type=lb.Classification.Type.CHECKLIST,
                            name="sub_checklist_question",
                            options=[lb.Option("first_sub_checklist_answer")],
                        )
                    ],
                )
            ],
        ),
        lb.Classification(
            class_type=lb.Classification.Type.RADIO,
            name="nested_radio_question",
            scope=lb.Classification.Scope.GLOBAL,
            options=[
                lb.Option(
                    "first_radio_answer",
                    options=[
                        lb.Classification(
                            class_type=lb.Classification.Type.RADIO,
                            name="sub_radio_question",
                            options=[lb.Option("first_sub_radio_answer")],
                        )
                    ],
                )
            ],
        ),
    ],
)

ontology = client.create_ontology(
    "Pairwise comparison ontology",
    ontology_builder.asdict(),
    media_type=lb.MediaType.Conversational,
)

## Step 3: Create a labeling project

In [None]:
# Create Labelbox project
project = client.create_project(
    name="Conversational Text Annotation Import Demo (Pairwise comparison)",
    media_type=lb.MediaType.Conversational,
)

# Setup your ontology
project.setup_editor(
    ontology)  # Connect your ontology and editor to your project

## Step 4: Send a batch of data rows to the project

In [None]:
# Create a batch to send to your project
batch = project.create_batch(
    "first-batch-convo-demo",  # Each batch in a project must have a unique name
    global_keys=[
        global_key
    ],  # Paginated collection of data row objects, list of data row ids or global keys
    priority=5,  # priority between 1(Highest) - 5(lowest)
)

print("Batch: ", batch)

## Step 5: Create the annotations payload

Python annotation

In [None]:
label = []
label.append(
    lb_types.Label(
        data={"global_key": global_key},
        annotations=[
            ner_annotation,
            text_annotation,
            checklist_annotation,
            radio_annotation,
            nested_radio_annotation,
            nested_checklist_annotation,
        ],
    ))

NDJSON annotation

In [None]:
label_ndjson = []
for annotations in [
        ner_annotation_ndjson,
        text_annotation_ndjson,
        checklist_annotation_ndjson,
        radio_annotation_ndjson,
        nested_checklist_annotation_ndjson,
        nested_radio_annotation_ndjson,
]:
    annotations.update({"dataRow": {"globalKey": global_key}})
    label_ndjson.append(annotations)

## Step 6: Upload annotations to a project as pre-labels or complete labels 

### Model Assisted Labeling (MAL)

In [None]:
upload_job = lb.MALPredictionImport.create_from_objects(
    client=client,
    project_id=project.uid,
    name=f"mal_job-{str(uuid.uuid4())}",
    predictions=label,
)

upload_job.wait_until_done()
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)

### Label Import

In [None]:
upload_job = lb.LabelImport.create_from_objects(
    client=client,
    project_id=project.uid,
    name="label_import_job" + str(uuid.uuid4()),
    labels=label,
)

upload_job.wait_until_done()
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)