In [None]:
# Install & import
!pip install -qU transformers datasets evaluate

import pandas as pd
import numpy as np
import torch
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from torch.utils.data import Dataset as TorchDataset
import evaluate

In [None]:
#Load & clean data

# Adjust path if loading from local CSV
train_df = pd.read_csv('train.csv', engine='python', on_bad_lines='skip')
train_df['comment_text'] = (
    train_df['comment_text']
        .str.lower()
        .str.replace(r"<.*?>", " ", regex=True)
        .str.replace(r"https?://\S+", " ", regex=True)
        .str.replace(r"[^a-z\s]", " ", regex=True)
        .str.replace(r"\s+", " ", regex=True)
        .str.strip()
)
label_cols = ['toxic','severe_toxic','obscene','threat','insult','identity_hate']

In [None]:
# train_df = train_df.sample(frac=0.3, random_state=42)

# Split into train/val
X = train_df['comment_text'].tolist()
y = train_df[label_cols].values
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
#Prepare tokenizer & model

checkpoint = 'sentence-transformers/all-MiniLM-L6-v2'
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(
    checkpoint,
    num_labels=len(label_cols),
    problem_type='multi_label_classification'
).to('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# create Datasets for Trainer

class ToxicTorchDataset(TorchDataset):
    def __init__(self, texts, labels, tokenizer):
        self.enc = tokenizer(
            texts, truncation=True, padding=True, return_tensors='pt'
        )
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        item = {k: v[idx] for k, v in self.enc.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

train_dataset = ToxicTorchDataset(X_train, y_train, tokenizer)
val_dataset   = ToxicTorchDataset(X_val,   y_val,   tokenizer)

In [None]:
#Define metrics and data collator

f1_metric = evaluate.load('f1')

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    probs = torch.sigmoid(torch.tensor(logits)).numpy()
    result = {}
    for i, lbl in enumerate(label_cols):
        result[f'roc_auc_{lbl}'] = roc_auc_score(labels[:, i], probs[:, i])
    # f1 on threshold 0.5
    preds = (probs >= 0.5).astype(int)
    f1 = f1_metric.compute(predictions=preds.flatten(), references=labels.flatten())
    result['f1_macro'] = f1['f1']
    return result

collator = DataCollatorWithPadding(tokenizer)

In [None]:
# TrainingArguments & Trainer

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Training on {device}")

training_args = TrainingArguments(
    output_dir='./sbert_finetuned',
    eval_strategy='epoch',
    save_strategy='epoch',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model='roc_auc_toxic'
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=collator,
    compute_metrics=compute_metrics
)

In [None]:
# fine‑tune

trainer.train()

In [None]:
#Evaluate on validation set

val_results = trainer.evaluate()
print(val_results)

In [None]:
from sklearn.metrics import classification_report
import torch
import numpy as np

´pred_out = trainer.predict(val_dataset)  #
logits   = pred_out.predictions           


probs  = torch.sigmoid(torch.tensor(logits)).numpy()
y_pred = (probs >= 0.5).astype(int)


y_true = y_val  # or y_test


print(classification_report(
    y_true,
    y_pred,
    target_names=label_cols,
    zero_division=0  
))