In [None]:
# %% [markdown]
# # 🧪 Toxic Comment Detection API (MVP)
# - Dataset: Jigsaw Toxic Comment Classification Challenge
# - Model: bert-base-uncased
# - Task: Binary classification (toxic = 1, not_toxic = 0)
# - Target: ≥88 F1-score

# %%
# Install packages
!pip install -q kagglehub transformers datasets scikit-learn torch

# %%
# Imports
import os
import re
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    get_scheduler
)
from datasets import Dataset as HFDataset
import kagglehub

In [None]:
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A")

PyTorch version: 2.8.0+cu126
CUDA available: True
GPU name: Tesla T4


In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# %%
# Download dataset
print("Downloading dataset...")
path = kagglehub.dataset_download("julian3833/jigsaw-toxic-comment-classification-challenge")
train_path = os.path.join(path, "train.csv")
df = pd.read_csv(train_path)
print(f"Dataset loaded. Shape: {df.shape}")

# %%
# Preprocessing function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'&[a-z]+;', '', text)
    return text.strip()

# Create binary label
toxic_columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
df['label'] = (df[toxic_columns].sum(axis=1) > 0).astype(int)
df = df[['comment_text', 'label']].copy()
df['comment_text'] = df['comment_text'].apply(clean_text)

print(f"After preprocessing: {df.shape}")
print(f"Label distribution:\n{df['label'].value_counts()}")

Using device: cuda
Downloading dataset...
Downloading from https://www.kaggle.com/api/v1/datasets/download/julian3833/jigsaw-toxic-comment-classification-challenge?dataset_version_number=1...


100%|██████████| 53.4M/53.4M [00:03<00:00, 14.3MB/s]

Extracting files...





Dataset loaded. Shape: (159571, 8)
After preprocessing: (159571, 2)
Label distribution:
label
0    143346
1     16225
Name: count, dtype: int64


In [None]:
# Split: 80% train, 10% val, 10% test
train_df, temp_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

Train: 127656, Val: 15957, Test: 15958


In [None]:
MODEL_NAME = "bert-base-uncased"
MAX_LENGTH = 256
BATCH_SIZE = 16

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    # ⚠️ Do NOT use return_tensors="pt" here
    return tokenizer(
        examples["comment_text"],
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH
    )

# Convert to Hugging Face Datasets
train_hf = HFDataset.from_pandas(train_df)
val_hf = HFDataset.from_pandas(val_df)
test_hf = HFDataset.from_pandas(test_df)

# Tokenize and remove raw text
train_enc = train_hf.map(tokenize_function, batched=True, remove_columns=["comment_text"])
val_enc = val_hf.map(tokenize_function, batched=True, remove_columns=["comment_text"])
test_enc = test_hf.map(tokenize_function, batched=True, remove_columns=["comment_text"])

# Set PyTorch format
train_enc.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
val_enc.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
test_enc.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# DataLoaders
train_loader = DataLoader(train_enc, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_enc, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_enc, batch_size=BATCH_SIZE)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Map:   0%|          | 0/127656 [00:00<?, ? examples/s]

Map:   0%|          | 0/15957 [00:00<?, ? examples/s]

Map:   0%|          | 0/15958 [00:00<?, ? examples/s]

In [None]:
# Model
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=1  # Sigmoid output for binary classification
)
model.to(device)

# Training config
EPOCHS = 3
LR = 2e-5

optimizer = AdamW(model.parameters(), lr=LR)
num_training_steps = EPOCHS * len(train_loader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

loss_fn = torch.nn.BCEWithLogitsLoss()

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
from tqdm.auto import tqdm
import time

model.train()
for epoch in range(EPOCHS):
    print(f"\n🚀 Epoch {epoch + 1}/{EPOCHS}")
    total_loss = 0
    progress_bar = tqdm(train_loader, desc="Training", leave=True)

    epoch_start_time = time.time()
    for batch in progress_bar:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].float().to(device).unsqueeze(1)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        loss = loss_fn(logits, labels)

        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        total_loss += loss.item()
        avg_batch_loss = total_loss / (progress_bar.n + 1)

        # Update progress bar with loss
        progress_bar.set_postfix({'loss': f'{avg_batch_loss:.4f}'})

    epoch_time = time.time() - epoch_start_time
    avg_loss = total_loss / len(train_loader)
    print(f"✅ Epoch {epoch + 1} finished | Avg Loss: {avg_loss:.4f} | Time: {epoch_time:.1f}s")


🚀 Epoch 1/3


Training:   0%|          | 0/7979 [00:00<?, ?it/s]

In [None]:
def evaluate_model(data_loader, model, device):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            probs = torch.sigmoid(logits).squeeze()
            preds = (probs > 0.5).int()

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    return {
        "accuracy": accuracy_score(all_labels, all_preds),
        "precision": precision_score(all_labels, all_preds),
        "recall": recall_score(all_labels, all_preds),
        "f1": f1_score(all_labels, all_preds)
    }

# Evaluate on test set
test_metrics = evaluate_model(test_loader, model, device)
print("\n" + "="*50)
print("TEST SET METRICS")
print("="*50)
print(f"Accuracy : {test_metrics['accuracy']:.4f}")
print(f"Precision: {test_metrics['precision']:.4f}")
print(f"Recall   : {test_metrics['recall']:.4f}")
print(f"F1-score : {test_metrics['f1']:.4f}")
print("="*50)

In [None]:
# Save model and tokenizer
MODEL_SAVE_PATH = "toxic_model"
model.save_pretrained(MODEL_SAVE_PATH)
tokenizer.save_pretrained(MODEL_SAVE_PATH)
print(f"✅ Model and tokenizer saved to '{MODEL_SAVE_PATH}'")

# %%
# Inference function
def predict(text: str):
    clean = clean_text(text)
    inputs = tokenizer(
        clean,
        return_tensors="pt",
        truncation=True,
        padding="max_length",
        max_length=MAX_LENGTH
    ).to(device)

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        prob = torch.sigmoid(outputs.logits).item()
        label = 1 if prob > 0.5 else 0
    return {"probability": prob, "label": label}

In [None]:
# Test examples
print("🧪 Example Predictions:")
examples = [
    "You are a stupid idiot!",
    "I love this movie!",
    "Go kill yourself, you worthless piece of trash.",
    "Thanks for your help!"
]

for ex in examples:
    res = predict(ex)
    label_str = "toxic" if res['label'] == 1 else "not_toxic"
    print(f"Text: {ex[:50]:<50} | Label: {label_str} (prob: {res['probability']:.4f})")

# Final F1 check
print("\n" + "="*50)
if test_metrics['f1'] >= 0.88:
    print("✅ SUCCESS: F1-score ≥ 88% achieved!")
else:
    print(f"⚠️  WARNING: F1-score ({test_metrics['f1']:.2%}) < 88%")
print("="*50)