In [80]:
from transformers import (
    AutoModelForSequenceClassification, 
    AutoTokenizer,
    TextClassificationPipeline,
)
import torch

In [81]:
THRESHOLD = 0.5

In [91]:
MODEL_PATH_NO_NEUTRAL = "CarusoVitor/multiemotion-portuguese"
MODEL_PATH_WITH_NEUTRAL = "CarusoVitor/multiemotion-portuguese-base-emotions"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [92]:
labels_neutral = ['Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Neutral']
labels = ['Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise']

In [93]:
configs = [
    (MODEL_PATH_NO_NEUTRAL, labels_neutral),
    (MODEL_PATH_WITH_NEUTRAL, labels)
]

In [94]:
pipes = {}
for p, l in configs:
    id2label = {idx:label for idx, label in enumerate(l)}
    label2id = {label:idx for idx, label in enumerate(l)}

    model = AutoModelForSequenceClassification.from_pretrained( 
    p,
    problem_type="multi_label_classification",
    num_labels=len(l),
    id2label=id2label,
    label2id=label2id
).to(device)
    tokenizer = AutoTokenizer.from_pretrained(p)
    pipes[p] = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True, function_to_apply="sigmoid")

Device set to use cpu
Device set to use cpu


In [86]:
sentence = "td vez que experimento, vejo o quão feio meu corpo está... af"

In [101]:
import pandas as pd

dev = pd.read_csv("public_data/dev/track_a/ptbr_a.csv")

In [107]:
for sentence in dev.text:
    no_neutral = pipes["CarusoVitor/multiemotion-portuguese-base-emotions"]
    with_neutral = pipes["CarusoVitor/multiemotion-portuguese"]

    scores = no_neutral(sentence)[0]
    predicted = [item["label"] for item in scores if item["score"] > THRESHOLD]

    scores_neutral = with_neutral(sentence)[0]
    predicted_neutral = [item["label"] for item in scores_neutral if item["score"] > THRESHOLD]

    results = {
        "CarusoVitor/multiemotion-portuguese": predicted,
        "CarusoVitor/multiemotion-portuguese-base-emotions": predicted_neutral
    }

    if predicted != predicted_neutral and len(predicted) > 0:
        for path, predicted in results.items():
            print("-"*100)
            print(path)
            print(f"frase: {sentence}")
            print(f"emoções: {predicted}")

----------------------------------------------------------------------------------------------------
CarusoVitor/multiemotion-portuguese
frase: Gol cagado de bate rebate, numa falta mequetrefe. Alguém amaldiçoou nosso segundo tempo...
emoções: ['Anger', 'Sadness']
----------------------------------------------------------------------------------------------------
CarusoVitor/multiemotion-portuguese-base-emotions
frase: Gol cagado de bate rebate, numa falta mequetrefe. Alguém amaldiçoou nosso segundo tempo...
emoções: ['Anger']
----------------------------------------------------------------------------------------------------
CarusoVitor/multiemotion-portuguese
frase: tá muito linda pqp
emoções: ['Joy', 'Surprise']
----------------------------------------------------------------------------------------------------
CarusoVitor/multiemotion-portuguese-base-emotions
frase: tá muito linda pqp
emoções: ['Joy']
---------------------------------------------------------------------------------