<a href="https://colab.research.google.com/github/PedroMarim/toxic-comment-bert/blob/main/notebooks/baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
!git clone https://github.com/PedroMarim/toxic-comment-bert.git


Cloning into 'toxic-comment-bert'...
remote: Enumerating objects: 7, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remote: Total 7 (delta 0), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (7/7), 21.84 KiB | 4.37 MiB/s, done.


In [5]:
%cd /content
!ls  # you should see 'toxic-comment-bert' here
%cd /content/toxic-comment-bert


/content
sample_data  toxic-comment-bert
/content/toxic-comment-bert


In [6]:
import os

os.makedirs("data/raw", exist_ok=True)
os.makedirs("data/processed", exist_ok=True)


## **Baselines**

In [11]:
import pandas as pd
import numpy as np
import os
import joblib

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import roc_auc_score


In [13]:
label_cols = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]

df = pd.read_csv("data/raw/train.csv")
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())

Shape: (159571, 8)
Columns: ['id', 'comment_text', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']


In [14]:
df["any_toxic"] = (df[label_cols].sum(axis=1) > 0).astype(int)

train_df, temp_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df["any_toxic"]
)

val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    random_state=42,
    stratify=temp_df["any_toxic"]
)

print("Train size:", len(train_df))
print("Val size:", len(val_df))
print("Test size:", len(test_df))

Train size: 127656
Val size: 15957
Test size: 15958


In [15]:
train_texts = train_df["comment_text"].fillna("")
val_texts   = val_df["comment_text"].fillna("")
test_texts  = test_df["comment_text"].fillna("")

vectorizer = TfidfVectorizer(
    max_features=50000,
    ngram_range=(1, 2),
    min_df=5
)

X_train = vectorizer.fit_transform(train_texts)
X_val   = vectorizer.transform(val_texts)
X_test  = vectorizer.transform(test_texts)

print("TF-IDF shapes:", X_train.shape, X_val.shape, X_test.shape)

y_train = train_df[label_cols].values
y_val   = val_df[label_cols].values
y_test  = test_df[label_cols].values

TF-IDF shapes: (127656, 50000) (15957, 50000) (15958, 50000)


In [16]:
logreg = LogisticRegression(
    max_iter=1000,
    n_jobs=-1
)

clf = OneVsRestClassifier(logreg)
clf.fit(X_train, y_train)

In [17]:
y_val_proba = clf.predict_proba(X_val)
y_test_proba = clf.predict_proba(X_test)

val_auc_macro = roc_auc_score(y_val, y_val_proba, average="macro")
test_auc_macro = roc_auc_score(y_test, y_test_proba, average="macro")

print(f"\nMacro ROC-AUC (val):  {val_auc_macro:.4f}")
print(f"Macro ROC-AUC (test): {test_auc_macro:.4f}")

print("\nPer-label AUC (val):")
for i, label in enumerate(label_cols):
    auc = roc_auc_score(y_val[:, i], y_val_proba[:, i])
    print(f"  {label:13s}: {auc:.4f}")


Macro ROC-AUC (val):  0.9757
Macro ROC-AUC (test): 0.9747

Per-label AUC (val):
  toxic        : 0.9698
  severe_toxic : 0.9819
  obscene      : 0.9807
  threat       : 0.9807
  insult       : 0.9750
  identity_hate: 0.9663


In [18]:
from sklearn.metrics import classification_report

# threshold at 0.5
y_val_pred = (y_val_proba >= 0.5).astype(int)

print("Classification report (val):")
print(classification_report(y_val, y_val_pred, target_names=label_cols, zero_division=0))


Classification report (val):
               precision    recall  f1-score   support

        toxic       0.93      0.57      0.71      1505
 severe_toxic       0.56      0.24      0.34       138
      obscene       0.92      0.60      0.73       823
       threat       0.56      0.10      0.17        49
       insult       0.83      0.52      0.64       772
identity_hate       0.79      0.23      0.36       141

    micro avg       0.89      0.53      0.67      3428
    macro avg       0.76      0.38      0.49      3428
 weighted avg       0.88      0.53      0.66      3428
  samples avg       0.05      0.04      0.05      3428

