# Classificação de Discurso de Ódio 
-------------------------------------
## Abordagem: tf-idf com Regressão Logística

-------------------------------------

## Importações e Definições

In [1]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import torch

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset

from utils import preprocess_text, format_lime_output, print_multilabel_metrics

[nltk_data] Downloading package stopwords to /home/penido/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
target = ['aggressive', 'hate', 'ageism', 'aporophobia', 'body_shame', 'capacitism', 'lgbtphobia', 'political', 'racism', 'religious_intolerance', 'misogyny', 'xenophobia', 'other']
features = 'text'

In [3]:
# Configurações para o pré-processamento
config = {
    "lowercase": True,
    "remove_accents": True,
    "remove_punctuation": True,
    "remove_numbers": True,
    "remove_urls": True,
    "remove_mentions_hashtags": True,
    "expand_abbreviations": True,
    "expand_contractions": False,
    "normalize_laughter": True,
    "remove_emojis": True,
    "remove_stopwords": True,
    "lemmatize": True,
    "stemming": False,
    "pos_filter": False,
    "min_token_length": 2,
    "negation_scope": False,
    "replace_swears": False,
    "split_hashtags": False,
    "merge_mwes" : True,
    "replace_named_entities" : False
}

--------------------------------------------
## Prepara o Conjunto de Dados

In [4]:
# 1. Carrega o dataset TuPyE multilabel
df = load_dataset("Silly-Machine/TuPyE-Dataset", name="multilabel")

train_df = df['train'].to_pandas()
test_df = df['test'].to_pandas()

X_train_raw = train_df[features]
y_train = train_df[target].values

X_test_raw = test_df[features]
y_test = test_df[target].values

### Aplica Pré-Processamento

In [5]:
X_train = X_train_raw.apply(lambda x: preprocess_text(x, config))
X_test = X_test_raw.apply(lambda x: preprocess_text(x, config))

--------------------------------------------
## Treinamento do Modelo

In [6]:
# 2. Tokenizador BERT em português
model_name = "neuralmind/bert-base-portuguese-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)

In [7]:
# 3. Crie seu dataset personalizado
class MultilabelDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.encodings = tokenizer(texts, truncation=True, padding=True, max_length=max_length)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

    def __len__(self):
        return len(self.labels)

In [8]:
train_dataset = MultilabelDataset(X_train.tolist(), y_train, tokenizer)
test_dataset = MultilabelDataset(X_test.tolist(), y_test, tokenizer)

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(target), problem_type="multi_label_classification")
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(29794, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [10]:
training_args = TrainingArguments(
    output_dir='./results',       # Ainda obrigatório, mas o Trainer não vai salvar nada
    per_device_train_batch_size=10,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_dir='./logs',
    logging_steps=50,
    save_strategy="no",           # <-- desativa o salvamento de modelos/checkpoints
)

In [11]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

In [13]:
trainer.train()

Step,Training Loss
50,0.2865
100,0.1687
150,0.142
200,0.1465
250,0.143
300,0.1476
350,0.1461
400,0.1369
450,0.1379
500,0.1362


TrainOutput(global_step=10482, training_loss=0.12831080153206337, metrics={'train_runtime': 6286.3345, 'train_samples_per_second': 16.671, 'train_steps_per_second': 1.667, 'total_flos': 6894322053410304.0, 'train_loss': 0.12831080153206337, 'epoch': 3.0})

-----------------------------------
## Avaliação no conjunto de Teste

In [14]:
def predict(text):
    model.eval()  # Garante que o modelo está em modo de avaliação
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Tokenização e envio dos inputs para o mesmo device do modelo
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.sigmoid(outputs.logits).cpu().numpy()[0]  # move o resultado de volta para CPU para conversão

    return dict(zip(target, probs))

In [15]:
# Exemplo:
print(predict("Esse cara é um idiota inútil"))

{'aggressive': 0.73042077, 'hate': 0.41679862, 'ageism': 0.0023744137, 'aporophobia': 0.004656531, 'body_shame': 0.00804762, 'capacitism': 0.007631217, 'lgbtphobia': 0.020090152, 'political': 0.1166642, 'racism': 0.015726589, 'religious_intolerance': 0.007384418, 'misogyny': 0.09751119, 'xenophobia': 0.013518502, 'other': 0.3657186}


In [16]:
# Exemplo:
print(predict("bom dia"))

{'aggressive': 0.070849404, 'hate': 0.027626446, 'ageism': 0.0025582353, 'aporophobia': 0.0024627233, 'body_shame': 0.005122473, 'capacitism': 0.0033473265, 'lgbtphobia': 0.008616269, 'political': 0.00947306, 'racism': 0.008391222, 'religious_intolerance': 0.0036443607, 'misogyny': 0.013015613, 'xenophobia': 0.013147087, 'other': 0.03351948}


In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(29794, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [18]:
from torch.utils.data import DataLoader

test_loader = DataLoader(test_dataset, batch_size=16)

all_probs = []
all_labels = []

In [19]:
from tqdm import tqdm

# 2. Inferência por batches no test set
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Predicting"):
        inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"}
        labels = batch["labels"].cpu().numpy()
        outputs = model(**inputs)
        probs = torch.sigmoid(outputs.logits).cpu().numpy()

        all_probs.append(probs)
        all_labels.append(labels)

# 3. Concatenar tudo
all_probs = np.concatenate(all_probs, axis=0)
all_labels = np.concatenate(all_labels, axis=0)

Predicting: 100%|██████████| 546/546 [03:05<00:00,  2.95it/s]


In [20]:
preds = (all_probs >= 0.5).astype(int)

In [21]:
preds[:, :2].shape

(8734, 2)

In [22]:
preds.shape

(8734, 13)

In [23]:
print_multilabel_metrics(all_labels, preds)


📊 Avaliação Multilabel
✔️ F1 Score (Micro):     0.4079
✔️ F1 Score (Macro):     0.1034
✔️ F1 Score (Weighted):  0.3545
⚠️ Hamming Loss:         0.0398
✅ Subset Accuracy:      0.6901


In [None]:
# Considerando só agressive e hate
print_multilabel_metrics(all_labels[:, :2], preds[:, :2])


📊 Avaliação Multilabel
✔️ F1 Score (Micro):     0.6385
✔️ F1 Score (Macro):     0.6150
✔️ F1 Score (Weighted):  0.6366
⚠️ Hamming Loss:         0.1254
✅ Subset Accuracy:      0.8082


In [41]:
from lime.lime_text import LimeTextExplainer

def explicar_comentario_bert(texto, tokenizer, model, target_labels, threshold=0.5, num_features=10):
    def predict_prob(texts):
        model.eval()
        model.to('cpu')
        inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
        inputs = {k: v.to('cpu') for k, v in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.sigmoid(outputs.logits).detach().cpu().numpy()
        return probs

    # Preparação
    class_names = target_labels
    explainer = LimeTextExplainer(class_names=class_names)
    pred = predict_prob([texto])[0]
    indices_ativas = [i for i, score in enumerate(pred) if score >= threshold]

    if not indices_ativas:
        print("\n⚪ O texto não é discurso de ódio ou agressivo.")
        return

    explanation = explainer.explain_instance(
        texto,
        predict_prob,
        num_features=num_features,
        labels=indices_ativas
    )

    print("📝 Texto:", texto)
    print("\n🧠 Explicação discursiva:")

    palavras_agressivas = set()
    palavras_odio = set()
    explicacoes = []

    for idx in indices_ativas:
        label = class_names[idx]
        top_palavras = explanation.as_list(label=idx)[:2]
        palavras = [palavra for palavra, peso in top_palavras]

        # Frase principal por classe
        frase = f"O texto é considerado '{label}' pelas palavras: {', '.join(palavras)}"
        explicacoes.append(frase)

        # Coleta para agrupamento posterior
        if label.lower() in ["agressivo", "agressividade", "emoji_agressivo"]:
            palavras_agressivas.update(palavras)
        else:
            palavras_odio.update(palavras)

    # Mostrar explicações por classe
    for frase in explicacoes:
        print("•", frase)

    # Mostrar explicação consolidada
    if palavras_agressivas:
        print(f"\n🔥 Considerado agressivo por causa das palavras: {', '.join(palavras_agressivas)}")
    if palavras_odio:
        print(f"☠️ Considerado discurso de ódio por causa das palavras: {', '.join(palavras_odio)}")

    # Explicação padrão por classe (LIME visual)
    print("\n--- 🔍 Explicações por classe ---")
    for idx in indices_ativas:
        print(f"\n➡️ Classe '{class_names[idx]}' (score = {pred[idx]:.2f}):")
        for word, weight in explanation.as_list(label=idx):
            print(f"   - {word}: {weight:.3f}")

In [48]:
texto = "viadinho de merda"
explicar_comentario_bert(texto, tokenizer, model, target)

📝 Texto: viadinho de merda

🧠 Explicação discursiva:
• O texto é considerado 'aggressive' pelas palavras: viadinho, merda
• O texto é considerado 'hate' pelas palavras: viadinho, merda
• O texto é considerado 'misogyny' pelas palavras: viadinho, merda
☠️ Considerado discurso de ódio por causa das palavras: viadinho, merda

--- 🔍 Explicações por classe ---

➡️ Classe 'aggressive' (score = 0.73):
   - viadinho: 0.350
   - merda: 0.143
   - de: 0.059

➡️ Classe 'hate' (score = 0.70):
   - viadinho: 0.576
   - merda: 0.049
   - de: 0.019

➡️ Classe 'misogyny' (score = 0.53):
   - viadinho: 0.502
   - merda: 0.006
   - de: 0.002


In [None]:
target

['aggressive',
 'hate',
 'ageism',
 'aporophobia',
 'body_shame',
 'capacitism',
 'lgbtphobia',
 'political',
 'racism',
 'religious_intolerance',
 'misogyny',
 'xenophobia',
 'other']

In [26]:
from tqdm import tqdm

def extrair_top_palavras_lime(texto, explainer, predict_proba, top_k=3, score_min=0.1):
    explicacao = explainer.explain_instance(texto, predict_proba, num_features=20)
    
    # Pega os pares (palavra, score), filtra por score mínimo, e ordena
    palavras_filtradas = [
        (palavra, score)
        for palavra, score in explicacao.as_list()
        if abs(score) >= score_min
    ]
    
    # Ordena por importância (absoluta) e pega as top-k palavras
    palavras_topk = sorted(palavras_filtradas, key=lambda x: abs(x[1]), reverse=True)[:top_k]
    
    # Só retorna a palavra (sem o score)
    return [palavra for palavra, _ in palavras_topk]


In [37]:
def predict_prob(texts):
        model.eval()
        model.to('cpu')
        inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
        inputs = {k: v.to('cpu') for k, v in inputs.items()}
        with torch.no_grad():
            outputs = model(**inputs)
            probs = torch.sigmoid(outputs.logits).detach().cpu().numpy()
        return probs

In [29]:
from lime.lime_text import LimeTextExplainer

explainer = LimeTextExplainer(class_names=target)

In [38]:
explicacoes = X_test[:1].apply(
    lambda x: extrair_top_palavras_lime(x, explainer, predict_prob)
)

In [39]:
explicacoes

0    []
Name: text, dtype: object

In [36]:
torch.cuda.empty_cache() 