In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, precision_recall_curve, average_precision_score, confusion_matrix, ConfusionMatrixDisplay
from transformers import AutoTokenizer, AutoModel
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class LogisticHateBERT:
    """Lasso model using HateBERT embeddings to detect hate speech."""

    def __init__(self):
        """
        Initialize classifier with HateBERT and L1-penalized logistic regression.
        """
        self.tokenizer = AutoTokenizer.from_pretrained("GroNLP/hateBERT")
        self.model_encoder = AutoModel.from_pretrained("GroNLP/hateBERT")
        self.model = LogisticRegression(penalty=None, solver="liblinear", max_iter=1000)

    def embed(self, texts: pd.Series) -> np.ndarray:
        """
        Embed text using HateBERT [CLS] token embeddings.

        Args:
            texts (pd.Series): Input phrases.

        Returns:
            np.ndarray: Sentence embeddings.
        """
        self.model_encoder.eval()
        embeddings = []

        with torch.no_grad():
            for text in texts:
                encoded = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
                output = self.model_encoder(**encoded)
                cls_embedding = output.last_hidden_state[:, 0, :]
                embeddings.append(cls_embedding.squeeze().numpy())

        return np.vstack(embeddings)

    def train(self, X: pd.Series, y: pd.Series) -> None:
        X_embed = self.embed(X)
        self.model.fit(X_embed, y)

    def predict_proba(self, X: pd.Series) -> pd.Series:
        X_embed = self.embed(X)
        proba = self.model.predict_proba(X_embed)[:, 1]
        return pd.Series(proba, index=X.index)

    def evaluate(self, X_test: pd.Series, y_test: pd.Series) -> None:
        proba = self.predict_proba(X_test)
        y_pred = (proba >= 0.25).astype(int)
        print(classification_report(y_test, y_pred))

In [3]:
train_df = pd.read_csv("../data/train_data.csv")
test_df = pd.read_csv("../data/test_data_clean.csv")

X_train = train_df["text"]
y_train = train_df["label"]

model = LogisticHateBERT()
model.train(X_train, y_train)

X_test = test_df["comment"]
y_test = test_df["isHate"].astype(int)

model.evaluate(X_test, y_test)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


KeyboardInterrupt: 

In [None]:
test_probs = model.predict_proba(X_test)

fpr, tpr, _ = roc_curve(y_test, test_probs)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f"ROC curve (AUC = {roc_auc:.2f})")
plt.plot([0, 1], [0, 1], linestyle="--", color="gray")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - Test Data")
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

In [None]:
precision, recall, _ = precision_recall_curve(y_test, test_probs)
avg_precision = average_precision_score(y_test, test_probs)

plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f"Avg Precision = {avg_precision:.2f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve - Test Data")
plt.legend(loc="upper right")
plt.grid(True)
plt.show()

In [None]:
y_test_pred = (test_probs >= 0.25).astype(int)

cm = confusion_matrix(y_test, y_test_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Not Hate", "Hate"])

disp.plot(cmap="Blues")
plt.title("Confusion Matrix - Test Data")
plt.grid(False)
plt.show()