# Sabiá-3 Evaluation



Hardware - CPU

# Installations



In [None]:
! pip install maritalk

Collecting maritalk
  Downloading maritalk-0.2.6-py3-none-any.whl.metadata (11 kB)
Collecting httpx (from maritalk)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting tiktoken<0.8,>=0.7 (from maritalk)
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting httpcore==1.* (from httpx->maritalk)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx->maritalk)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading maritalk-0.2.6-py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m46.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m46.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.0-py3-none-any.wh

#00 - Google Drive Mount



In [None]:
from google.colab import drive
drive.mount('./gdrive', force_remount=True)

Mounted at ./gdrive


# 01 - Imports



In [None]:
import os
import glob
import re
import numpy as np
import pandas as pd
import json
import requests
import maritalk

from google.colab import userdata
from pathlib import Path
from time import sleep

# 02 - Constants



In [None]:
MODEL_NAME = "sabia-3"


PREPROCESSED_DATA_ROOT_PATH = userdata.get('IA_DATA_PREPROCESSED')


TEST_DATASETS = {Path(dataset).stem.split("_test")[0]: {"df": pd.read_csv(dataset), "path":Path(dataset).parent} for dataset in glob.glob(f'{PREPROCESSED_DATA_ROOT_PATH}/*/*_test.csv')}
DEMO_DATASETS = {Path(dataset).stem.split("_demo")[0]: {"df": pd.read_csv(dataset), "path":Path(dataset).parent} for dataset in glob.glob(f'{PREPROCESSED_DATA_ROOT_PATH}/*/*_demo.csv')}
RESULT_DATASETS = [Path(dataset).stem.split(f"_{MODEL_NAME}_result_v2")[0] for dataset in glob.glob(f'{PREPROCESSED_DATA_ROOT_PATH}/*/*_{MODEL_NAME}_result_v2.csv')]


HF_AUTH = userdata.get('HF_TOKEN')
MARITACA_AUTH = userdata.get('MARITACA_TOKEN')


BASE_INSTRUCTION = """Você deverá realizar a tarefa de Classificação de Sentimento Binária em relação a polaridade de textos escritos no idioma português brasileiro considerando dois possíveis rótulos de saída: 1 para o sentimentos positivos ou -1 para negativos. A saída produzida deverá ser em formato JSON, seguindo o esquema definido entre os marcadores ```.```
{'type': 'object','description': Objeto de saída fornecido pelo classificador após a classificação de sentimento do texto de entrada.', 'properties': {'polaridade': {'type': 'integer','description': 'Polaridade em relação ao sentimento expressado no texto de entrada. Pode assumir 2 valores: [-1, 1]','enum': [-1,1]}},
  'required': ['polaridade']}```Considere os seguintes exemplos para realizar a predição:"""

# 03 - Functions


In [None]:
def generate_sorted_examples(dataframe:pd.DataFrame)->str:
    """
    Generate a string of sorted examples from a DataFrame for sentiment analysis.

    Args:
        dataframe (pd.DataFrame): The DataFrame containing the examples.
                                  It should have columns 'text' and 'label'.

    Returns:
        str: A string containing formatted examples of input text and their corresponding polarities.
    """
    examples = ''
    for i, _ in enumerate(dataframe[:3].index):
        examples = examples + (
        "\n"
        f"Exemplo:\n"
        f"'entrada': '{dataframe['text'][i]}'\n"
        "'saida':{'polaridade': " + f"{dataframe['label'][i]}"+"}"
        "\n"
        f"Exemplo:\n"
        f"'entrada': '{dataframe['text'][i+3]}'\n"
        "'saida':{'polaridade': " + f"{dataframe['label'][i+3]}"+"}")
    return examples


def generate_classification_text(text:str)->str:
    """
    Generate a formatted string for sentiment classification input and output.

    Args:
        text (str): The input text to be classified.

    Returns:
        str: A string formatted for sentiment classification showing the input text.
    """
    classification = (
        "\n"
        f"Classificação de Sentimento:"
        f"'entrada': '{text}'"
        "'saida':")
    return classification

# 04 - Execution

In [None]:
for key, dataset in TEST_DATASETS.items():
    if key not in RESULT_DATASETS:
        print(f'Starting to evaluate dataset: {key}')
        results_list = []
        examples = generate_sorted_examples(DEMO_DATASETS[key]['df'])
        instruction = BASE_INSTRUCTION + examples
        model = maritalk.MariTalk(key=MARITACA_AUTH,
                            model='sabia-3')

        messages = []
        messages.append({'role':'system',
                        'content':instruction})

        for index, item in enumerate(dataset['df']['text']):
            print(index)
            classification_text = generate_classification_text(item)
            messages.append({'role':'user',
                            'content':classification_text
                            })

            response = model.generate(messages,
                                      temperature=0,
                                      max_tokens=20,
                                      do_sample=False)
            sleep(5)
            messages.pop()
            results_list.append(response['answer'])

        dataset['df']['predictions'] = results_list
        dataset['df'].to_csv(f'{str(dataset["path"])}/{key}_{MODEL_NAME}_result.csv', index=False)
        RESULT_DATASETS = [Path(dataset).stem.split(f"_{MODEL_NAME}_result_v2")[0] for dataset in glob.glob(f'{PREPROCESSED_DATA_ROOT_PATH}/*/*_{MODEL_NAME}_result_v2.csv')]

        print(f'The evaluation of dataset:{key} has ended.')