## Teste Modelo tabularisai/multilingual-sentiment-analysis

### Modelo 1

In [36]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "tabularisai/multilingual-sentiment-analysis"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def predict_sentiment(texts):
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
    sentiment_map = {0: "Very Negative", 1: "Negative", 2: "Neutral", 3: "Positive", 4: "Very Positive"}
    return [sentiment_map[p] for p in torch.argmax(probabilities, dim=-1).tolist()]

texts = [
    "Café cresce em 2025 e lidera criação de bovinos nacionalmente."
]

for text, sentiment in zip(texts, predict_sentiment(texts)):
    print(f"Text: {text}\nSentiment: {sentiment}\n")


Text: Café cresce em 2025 e lidera criação de bovinos nacionalmente.
Sentiment: Neutral



### Modelo 2

In [None]:
#LIMPA CACHE

limpacache = False

import shutil
from transformers.utils import default_cache_path
if limpacache:
   cache_dir = default_cache_path
   shutil.rmtree(cache_dir)


In [None]:
import torch
from transformers import (
    MBart50TokenizerFast,
    MBartForConditionalGeneration,
    AutoTokenizer,
    AutoModelForSequenceClassification
)

In [None]:
#Tradutor
TRANSLATE_MODEL = "Narrativa/mbart-large-50-finetuned-opus-en-pt-translation"

tokenizer_tr = MBart50TokenizerFast.from_pretrained(TRANSLATE_MODEL)
model_tr = MBartForConditionalGeneration.from_pretrained(TRANSLATE_MODEL)

# CONFIGURAÇÃO CRÍTICA DE IDIOMA
tokenizer_tr.src_lang = "pt_XX"
tokenizer_tr.tgt_lang = "en_XX"

model_tr.eval()


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:  55%|#####4    | 1.34G/2.44G [00:00<?, ?B/s]

MBartForConditionalGeneration(
  (model): MBartModel(
    (shared): MBartScaledWordEmbedding(250054, 1024, padding_idx=1)
    (encoder): MBartEncoder(
      (embed_tokens): MBartScaledWordEmbedding(250054, 1024, padding_idx=1)
      (embed_positions): MBartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x MBartEncoderLayer(
          (self_attn): MBartAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True

In [50]:
#Analise de noticia
MODEL_NAME = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"

tokenizer_as = AutoTokenizer.from_pretrained(MODEL_NAME)
model_as = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

model_as.eval()  # modo inferência

tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/933 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/328M [00:00<?, ?B/s]

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-5): 6 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
           

In [43]:
def classificar_sentimento(textos_en):
    inputs = tokenizer_as(
        textos_en,
        return_tensors="pt",
        padding=True,
        truncation=True
    )

    with torch.no_grad():
        outputs = model_as(**inputs)

    probs = torch.softmax(outputs.logits, dim=-1)

    resultados = []
    for linha in probs:
        r = {}
        for i, p in enumerate(linha):
            label = model_as.config.id2label[i]
            r[label] = round(p.item() * 100, 2)
        resultados.append(r)

    return resultados


In [46]:
frases_pt = [
    "A produção de milho bateu recorde histórico.",
    "Chuvas excessivas atrasaram o plantio da safra."
]

frases_en = traduzir_pt_en(frases_pt)
print(frases_en)
# resultados = classificar_sentimento(frases_en)

# for pt, en, r in zip(frases_pt, frases_en, resultados):
#     print("\nPT:", pt)
#     print("EN:", en)
#     print("Sentimento:", r)


['A produção de produção de milho bateu bateu bateu recorde histórico histórico histórico.', 'Chuvas excessivas excessivas excessivas atrasaram o plantio da safra.']
