# TODO:
- ~~enforce output format for gemini~~
- llama, gpt, claude
- add evaluation if there is a golden set for individual model + compare result with other LLMs
- aggregation strategy
- add for multi-class/ner
- add images
- add result/eval visualization


nice things to do:
- add tqdm to asyncio calls
- proper logging

In [1]:
from utils import Annotate, Evaluate
from datasets import load_dataset

seed =42

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
gemini_prompt_template = """
<data_description>
{description}
</data_description>
-----------

<context>
{datapoint}
</context>
------------

<labels>
{labels}
</labels>
------------

INSTRUCTION:
- familirize yourself with the data using data_description
- read the context carefully. this is the data point you need to label.
- take your time and label the dadatapoint with the most appropriate option using the provided labels.
- return the result as a single label from the <labels>. Don't provide explanations
"""

In [3]:
dataset = load_dataset("yelp_polarity", split="train") # https://huggingface.co/datasets/yelp_polarity

# take a small sample for dev purposes
dataset_sample = dataset.shuffle(seed=seed).select(range(1000))

# user provided data description
DESCRIPTION = """
This is a dataset for binary sentiment classification.
It contains highly polar yelp reviews.
Negative polarity is class 0, and positive class 1.
"""

LABEL_SET = [0, 1] 

In [4]:
prompt = [gemini_prompt_template.format(description= DESCRIPTION,
                                        datapoint=x,
                                        labels=LABEL_SET) for x in dataset_sample["text"][:5]]
len(prompt)

5

In [5]:
ann = Annotate()
eval = Evaluate()

In [6]:
# gemini
result = await ann.TextClassification(prompt, model="gemini")

[1, 0, 0, 1, 1]


In [8]:
# claude haiku
result = await ann.TextClassification(prompt, model="claude")

[1, 0, 0, 1, 1]


In [9]:
eval.TextClfWithGT(y_true= dataset_sample["label"][:5],
                   y_pred=result)

{'accuracy': 1.0,
 'f1_weighted': 1.0,
 'confusion_matrix': array([[2, 0],
        [0, 3]])}

## anthropic

In [None]:
from anthropic import AnthropicVertex


In [None]:
claude_config = {"project_config": {"qpm":60,  # https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude
                                    "project": "cloud-llm-preview1",
                                    "location": "us-central1"},
                "generation_config": ""

}


In [None]:
LOCATION="us-central1" # or "europe-west4"

client = AnthropicVertex(region=LOCATION, project_id="cloud-llm-preview1")

message = client.messages.create(
  max_tokens=1024,
  messages=[
    {
      "role": "user",
      "content": prompt[0],
    }
  ],
  model="claude-3-haiku@20240307",
)
print(message.content[0].text)

In [None]:
print(message.model_dump_json(indent=2))