# Transformer Models for Toxic Comment Classification

This notebook implements and evaluates various transformer architectures (BERT, RoBERTa, DistilBERT) for the toxic comment classification task.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
from sklearn.metrics import classification_report, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from torch.optim import AdamW  
import torch.nn.functional as F

# Transformers libraries
from transformers import (
    AutoTokenizer, 
    AutoModel,
    AutoModelForSequenceClassification, 
    get_linear_schedule_with_warmup,
    BertTokenizer,
    BertForSequenceClassification,
    RobertaTokenizer,
    RobertaForSequenceClassification,
    DistilBertTokenizer,
    DistilBertForSequenceClassification
)

# Set random seeds
import random
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## 1. Load and Prepare Data

In [None]:
# Load preprocessed data
train_data = pd.read_csv('../Dataset/train_preprocessed.csv')

# Check the data
print(f"Training data shape: {train_data.shape}")
train_data.head()

Training data shape: (159571, 12)


Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate,processed_text,original_length,processed_length,length_reduction
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0,explanation edits made my username hardcore me...,264,202,23.484848
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0,daww ! he match background colour im seemingly...,112,86,23.214286
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0,"hey man , im really not trying edit war . guy ...",233,165,29.184549
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0,i cant make real suggestion improvement i wond...,622,406,34.726688
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0,"you , sir , my hero . chance you remember page...",67,54,19.402985


In [None]:
# Define the features and target labels
X = train_data['processed_text']  # Use the preprocessed text
y = train_data[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']]

# Handle missing values
X = X.fillna("")  # Replace NaN values with empty strings
y = y.fillna(0)   # Replace any missing target values with 0

# Split the data into training, validation and test sets
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42, stratify=y['toxic']
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.11, random_state=42, stratify=y_temp['toxic']
)

print(f"Training set size: {X_train.shape[0]}")
print(f"Validation set size: {X_val.shape[0]}")
print(f"Test set size: {X_test.shape[0]}")

Training set size: 127815
Validation set size: 15798
Test set size: 15958


In [None]:
# For transformers, let's keep only a subset of the data for faster training
# Use this for development and debugging, comment out for full training
SAMPLE_SIZE = 10000

# Sample from train
train_indices = np.random.choice(len(X_train), min(SAMPLE_SIZE, len(X_train)), replace=False)
X_train_sample = X_train.iloc[train_indices]
y_train_sample = y_train.iloc[train_indices]

# Sample from validation
val_indices = np.random.choice(len(X_val), min(SAMPLE_SIZE // 5, len(X_val)), replace=False)
X_val_sample = X_val.iloc[val_indices]
y_val_sample = y_val.iloc[val_indices]

print(f"Sampled train size: {len(X_train_sample)}")
print(f"Sampled validation size: {len(X_val_sample)}")

Sampled train size: 10000
Sampled validation size: 2000


## 2. Create Dataset for Transformer Models

In [None]:
class ToxicCommentDatasetTransformer(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        labels = torch.FloatTensor(self.labels.iloc[idx].values)
        
        # Tokenize the text
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        # Remove the batch dimension added by the tokenizer
        encoding = {k: v.squeeze(0) for k, v in encoding.items()}
        
        # Add labels
        encoding['labels'] = labels
        
        return encoding

## 3. Build Transformer Models

In [None]:
class BertForMultiLabelClassification(nn.Module):
    def __init__(self, num_labels, model_name="bert-base-uncased", dropout_rate=0.1):
        super(BertForMultiLabelClassification, self).__init__()
        self.num_labels = num_labels
        
        # Load pre-trained BERT model
        self.bert = AutoModel.from_pretrained(model_name)
        
        # Classification head
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
        
    def forward(self, input_ids, attention_mask, token_type_ids=None, labels=None):
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids
        )
        
        # Use the [CLS] token representation
        pooled_output = outputs.last_hidden_state[:, 0]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        
        # Apply sigmoid activation for multi-label
        return torch.sigmoid(logits)

## 4. Training and Evaluation Functions

In [None]:
# Add the AdaptiveFocalLoss class from your RNN notebook
class AdaptiveFocalLoss(nn.Module):
    def __init__(self, alpha=None, class_gammas=None):
        super().__init__()
        self.alpha = alpha
        self.class_gammas = class_gammas
        self.epsilon = 1e-6
        
    def forward(self, inputs, targets):
        inputs = torch.clamp(inputs, self.epsilon, 1 - self.epsilon)
        
        # Binary cross entropy
        bce_loss = F.binary_cross_entropy(inputs, targets, reduction='none')
        
        # Different gamma for each class
        focal_loss = torch.zeros_like(bce_loss)
        class_names = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
        
        for i, col in enumerate(class_names):
            gamma = self.class_gammas.get(col, 2.0)  # Default gamma is 2.0
            pt = torch.exp(-bce_loss[:, i])
            focal_weight = (1 - pt) ** gamma
            
            # Apply class weights if provided
            if self.alpha is not None:
                focal_weight = focal_weight * self.alpha[i]
                
            focal_loss[:, i] = focal_weight * bce_loss[:, i]
        
        return focal_loss.mean()

In [None]:
# Calculate class weights based on the training set distribution
class_dist = np.array([y_train[col].mean() for col in y_train.columns])
print(f"Class distribution: {class_dist}")
class_weights = 1 / (class_dist + 0.01)  # Add small epsilon to avoid division by zero
class_weights = class_weights / class_weights.sum() * len(class_weights)  # Normalize weights

# Adjust class weights for rare classes
class_weights[3] *= 30  # Increase weight for threat
class_weights[5] *= 8   # Increase weight for identity_hate
class_weights[1] *= 8   # Increase weight for severe_toxic

print(f"Class weights: {class_weights}")

# Define class-specific gamma values for focal loss
class_gammas = {
    'toxic': 1.0,
    'severe_toxic': 1.5, 
    'obscene': 1.0,
    'threat': 1.5,
    'insult': 1.0,
    'identity_hate': 2.0
}

# Create the focal loss
criterion = AdaptiveFocalLoss(alpha=torch.FloatTensor(class_weights), class_gammas=class_gammas).to(device)


Class distribution: [0.09584947 0.00987365 0.05295936 0.0029261  0.04935258 0.00860619]
Class weights: [ 0.25350136 10.80143425  0.42619535 62.27626853  0.45209466 11.53722987]


In [None]:
def train_transformer_epoch(model, data_loader, optimizer, scheduler, device):
    model.train()
    epoch_loss = 0
    
    progress_bar = tqdm(data_loader, desc="Training")
    
    for batch in progress_bar:
        optimizer.zero_grad()
        
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        
        # Forward pass
        if 'token_type_ids' in batch:
            token_type_ids = batch['token_type_ids'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        else:
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        
        # Calculate loss - binary cross entropy for multi-label
        loss = criterion(outputs, labels)
        
        # Backward pass
        loss.backward()
        
        # Gradient clipping to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        
        # Optimizer and scheduler steps
        optimizer.step()
        scheduler.step()
        
        epoch_loss += loss.item()
        progress_bar.set_postfix({"loss": loss.item()})
    
    return epoch_loss / len(data_loader)

In [None]:
def evaluate_transformer(model, data_loader, device, threshold=0.5):
    model.eval()
    all_predictions = []
    all_labels = []
    total_loss = 0
    
    with torch.no_grad():
        for batch in tqdm(data_loader, desc="Evaluating"):
            # Move batch to device
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            # Forward pass
            if 'token_type_ids' in batch:
                token_type_ids = batch['token_type_ids'].to(device)
                outputs = model(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
            else:
                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            
            # Calculate loss
            loss = nn.BCELoss()(outputs, labels)
            total_loss += loss.item()
            
            # Convert outputs to binary predictions
            preds = (outputs > threshold).float().cpu().numpy()
            true_labels = labels.cpu().numpy()
            
            all_predictions.append(preds)
            all_labels.append(true_labels)
    
    # Concatenate all batches
    all_predictions = np.vstack(all_predictions)
    all_labels = np.vstack(all_labels)
    
    # Calculate metrics
    accuracy = accuracy_score(all_labels.flatten(), all_predictions.flatten())
    macro_f1 = f1_score(all_labels, all_predictions, average='macro')
    micro_f1 = f1_score(all_labels, all_predictions, average='micro')
    
    # Calculate per-class F1
    class_f1 = {}
    for i, col in enumerate(['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']):
        class_f1[col] = f1_score(all_labels[:, i], all_predictions[:, i])
    
    avg_loss = total_loss / len(data_loader)
    
    return {
        'accuracy': accuracy,
        'macro_f1': macro_f1,
        'micro_f1': micro_f1,
        'class_f1': class_f1,
        'loss': avg_loss,
        'predictions': all_predictions,
        'true_labels': all_labels
    }

In [None]:
def train_transformer_model(model, train_loader, val_loader, optimizer, scheduler, device, 
                           num_epochs=3, patience=2, model_save_path='../models/best_transformer.pt'):
    best_val_loss = float('inf')
    train_losses = []
    val_losses = []
    counter = 0  # For early stopping
    
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        
        # Train
        start_time = time.time()
        train_loss = train_transformer_epoch(model, train_loader, optimizer, scheduler, device)
        train_losses.append(train_loss)
        
        # Validate
        val_results = evaluate_transformer(model, val_loader, device)
        val_loss = val_results['loss']
        val_losses.append(val_loss)
        
        # Print epoch summary
        time_elapsed = time.time() - start_time
        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | "
              f"Val F1 Macro: {val_results['macro_f1']:.4f} | "
              f"Time: {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s")
        
        # Print class-wise F1 scores
        print("Class F1 scores:")
        for cls, f1 in val_results['class_f1'].items():
            print(f"  {cls}: {f1:.4f}")
        
        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), model_save_path)
            print(f"Saved best model with val_loss: {val_loss:.4f}")
            counter = 0  # Reset early stopping counter
        else:
            counter += 1
            print(f"EarlyStopping counter: {counter} out of {patience}")
            if counter >= patience:
                print("Early stopping triggered")
                break
                
    return train_losses, val_losses

## 5. BERT Implementation

In [None]:
# Initialize BERT tokenizer and model
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertForMultiLabelClassification(num_labels=6, model_name='bert-base-uncased')
bert_model.to(device)

# Create datasets with the BERT tokenizer
train_dataset = ToxicCommentDatasetTransformer(X_train_sample, y_train_sample, bert_tokenizer)
val_dataset = ToxicCommentDatasetTransformer(X_val_sample, y_val_sample, bert_tokenizer)

# Create dataloaders
BATCH_SIZE = 16  # Smaller batch size for Transformer models due to memory constraints

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2
)

In [None]:
# Setup optimizer and scheduler
optimizer = AdamW(bert_model.parameters(), lr=2e-5)

# Calculate total steps for scheduler
total_steps = len(train_loader) * 3  # 3 epochs
warmup_steps = int(total_steps * 0.1)  # 10% of total steps for warmup

scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)

In [None]:
# Train BERT model
os.makedirs('../models', exist_ok=True)
bert_train_losses, bert_val_losses = train_transformer_model(
    bert_model,
    train_loader,
    val_loader,
    optimizer,
    scheduler,
    device,
    num_epochs=3,
    patience=2,
    model_save_path='../models/bert_toxic_classifier.pt'
)

Epoch 1/3


Training:   0%|          | 0/625 [00:00<?, ?it/s]

NameError: name 'F' is not defined

## 6. RoBERTa Implementation

In [None]:
# Initialize RoBERTa tokenizer and model
roberta_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = BertForMultiLabelClassification(num_labels=6, model_name='roberta-base')
roberta_model.to(device)

# Create datasets with the RoBERTa tokenizer
train_dataset_roberta = ToxicCommentDatasetTransformer(X_train_sample, y_train_sample, roberta_tokenizer)
val_dataset_roberta = ToxicCommentDatasetTransformer(X_val_sample, y_val_sample, roberta_tokenizer)

# Create dataloaders
train_loader_roberta = DataLoader(
    train_dataset_roberta,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

val_loader_roberta = DataLoader(
    val_dataset_roberta,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2
)

In [None]:
# Setup optimizer and scheduler for RoBERTa
optimizer_roberta = AdamW(roberta_model.parameters(), lr=2e-5)
scheduler_roberta = get_linear_schedule_with_warmup(
    optimizer_roberta,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)

In [None]:
# Train RoBERTa model
roberta_train_losses, roberta_val_losses = train_transformer_model(
    roberta_model,
    train_loader_roberta,
    val_loader_roberta,
    optimizer_roberta,
    scheduler_roberta,
    device,
    num_epochs=3,
    patience=2,
    model_save_path='../models/roberta_toxic_classifier.pt'
)

## 7. DistilBERT Implementation (Faster Training)

In [None]:
# Initialize DistilBERT tokenizer and model
distilbert_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
distilbert_model = BertForMultiLabelClassification(num_labels=6, model_name='distilbert-base-uncased')
distilbert_model.to(device)

# Create datasets with the DistilBERT tokenizer
train_dataset_distilbert = ToxicCommentDatasetTransformer(X_train_sample, y_train_sample, distilbert_tokenizer)
val_dataset_distilbert = ToxicCommentDatasetTransformer(X_val_sample, y_val_sample, distilbert_tokenizer)

# Create dataloaders
train_loader_distilbert = DataLoader(
    train_dataset_distilbert,
    batch_size=BATCH_SIZE * 2,  # Can use larger batch size with DistilBERT
    shuffle=True,
    num_workers=2
)

val_loader_distilbert = DataLoader(
    val_dataset_distilbert,
    batch_size=BATCH_SIZE * 2,
    shuffle=False,
    num_workers=2
)

In [None]:
# Setup optimizer and scheduler for DistilBERT
optimizer_distilbert = AdamW(distilbert_model.parameters(), lr=3e-5)  # Slightly higher learning rate
scheduler_distilbert = get_linear_schedule_with_warmup(
    optimizer_distilbert,
    num_warmup_steps=warmup_steps,
    num_training_steps=total_steps
)

In [None]:
# Train DistilBERT model (faster training)
distilbert_train_losses, distilbert_val_losses = train_transformer_model(
    distilbert_model,
    train_loader_distilbert,
    val_loader_distilbert,
    optimizer_distilbert,
    scheduler_distilbert,
    device,
    num_epochs=4,  # Can train for more epochs due to faster training
    patience=2,
    model_save_path='../models/distilbert_toxic_classifier.pt'
)

## 8. Evaluate on Test Set

In [None]:
# Create test dataset with each tokenizer
test_dataset_bert = ToxicCommentDatasetTransformer(X_test, y_test, bert_tokenizer)
test_dataset_roberta = ToxicCommentDatasetTransformer(X_test, y_test, roberta_tokenizer)
test_dataset_distilbert = ToxicCommentDatasetTransformer(X_test, y_test, distilbert_tokenizer)

# Create test dataloaders
test_loader_bert = DataLoader(test_dataset_bert, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader_roberta = DataLoader(test_dataset_roberta, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader_distilbert = DataLoader(test_dataset_distilbert, batch_size=BATCH_SIZE*2, shuffle=False, num_workers=2)

In [None]:
# Load the best models
bert_model.load_state_dict(torch.load('../models/bert_toxic_classifier.pt'))
roberta_model.load_state_dict(torch.load('../models/roberta_toxic_classifier.pt'))
distilbert_model.load_state_dict(torch.load('../models/distilbert_toxic_classifier.pt'))

# Evaluate on test set
print("Evaluating BERT model on test set...")
bert_test_results = evaluate_transformer(bert_model, test_loader_bert, device)

print("\nEvaluating RoBERTa model on test set...")
roberta_test_results = evaluate_transformer(roberta_model, test_loader_roberta, device)

print("\nEvaluating DistilBERT model on test set...")
distilbert_test_results = evaluate_transformer(distilbert_model, test_loader_distilbert, device)

## 9. Compare Model Performance

In [None]:
# Create a comparison dataframe
model_results = {
    'BERT': {
        'accuracy': bert_test_results['accuracy'],
        'macro_f1': bert_test_results['macro_f1'],
        'micro_f1': bert_test_results['micro_f1'],
        **{f'f1_{k}': v for k, v in bert_test_results['class_f1'].items()}
    },
    'RoBERTa': {
        'accuracy': roberta_test_results['accuracy'],
        'macro_f1': roberta_test_results['macro_f1'],
        'micro_f1': roberta_test_results['micro_f1'],
        **{f'f1_{k}': v for k, v in roberta_test_results['class_f1'].items()}
    },
    'DistilBERT': {
        'accuracy': distilbert_test_results['accuracy'],
        'macro_f1': distilbert_test_results['macro_f1'],
        'micro_f1': distilbert_test_results['micro_f1'],
        **{f'f1_{k}': v for k, v in distilbert_test_results['class_f1'].items()}
    }
}

# Convert to DataFrame
results_df = pd.DataFrame(model_results).T
results_df = results_df.sort_values('macro_f1', ascending=False)

# Display results
print("Model Performance Comparison:")
display(results_df)

# Save results
results_df.to_csv('../results/transformer_model_results.csv')

In [None]:
# Plot F1 scores across categories
plt.figure(figsize=(14, 8))

categories = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
x = np.arange(len(categories))
width = 0.25

# Plot bars for each model
bert_f1 = [bert_test_results['class_f1'][cat] for cat in categories]
roberta_f1 = [roberta_test_results['class_f1'][cat] for cat in categories]
distilbert_f1 = [distilbert_test_results['class_f1'][cat] for cat in categories]

plt.bar(x - width, bert_f1, width, label='BERT')
plt.bar(x, roberta_f1, width, label='RoBERTa')
plt.bar(x + width, distilbert_f1, width, label='DistilBERT')

plt.xlabel('Toxicity Category')
plt.ylabel('F1 Score')
plt.title('F1 Scores by Toxicity Category and Model')
plt.xticks(x, categories, rotation=45)
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()

# Save the figure
plt.savefig('../results/transformer_f1_comparison.png')
plt.show()

## 10. Model Prediction Pipeline

In [None]:
class TransformerToxicCommentPredictor:
    def __init__(self, model_path, model_type='bert', device='cpu'):
        self.device = torch.device(device)
        self.model_type = model_type.lower()
        
        # Initialize appropriate tokenizer and model
        if self.model_type == 'bert':
            self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
            self.model = BertForMultiLabelClassification(num_labels=6, model_name='bert-base-uncased')
        elif self.model_type == 'roberta':
            self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
            self.model = BertForMultiLabelClassification(num_labels=6, model_name='roberta-base')
        elif self.model_type == 'distilbert':
            self.tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
            self.model = BertForMultiLabelClassification(num_labels=6, model_name='distilbert-base-uncased')
        else:
            raise ValueError(f"Unsupported model type: {model_type}. Choose 'bert', 'roberta', or 'distilbert'.")
            
        # Load the trained model weights
        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
        self.model.to(self.device)
        self.model.eval()
        
        # Label names
        self.label_names = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
        
    def predict(self, text, threshold=0.5):
        # Tokenize the input text
        inputs = self.tokenizer(
            text,
            max_length=128,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        # Move inputs to device
        inputs = {k: v.to(self.device) for k, v in inputs.items()}
        
        # Get predictions
        with torch.no_grad():
            if self.model_type == 'bert':
                outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], 
                                     token_type_ids=inputs['token_type_ids'])
            else:  # RoBERTa and DistilBERT don't use token_type_ids
                outputs = self.model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'])
        
        # Convert to probabilities and binary decisions
        probs = outputs.squeeze().cpu().numpy()
        binary_preds = (probs > threshold).astype(int)
        
        # Create a dictionary of results
        results = {
            'probabilities': {label: float(prob) for label, prob in zip(self.label_names, probs)},
            'predictions': {label: int(pred) for label, pred in zip(self.label_names, binary_preds)},
            'is_toxic': bool(np.any(binary_preds))
        }
        
        return results

In [None]:
# Create predictors from each model
predictors = {
    'BERT': TransformerToxicCommentPredictor('../models/bert_toxic_classifier.pt', model_type='bert', device=device),
    'RoBERTa': TransformerToxicCommentPredictor('../models/roberta_toxic_classifier.pt', model_type='roberta', device=device),
    'DistilBERT': TransformerToxicCommentPredictor('../models/distilbert_toxic_classifier.pt', model_type='distilbert', device=device)
}

In [None]:
# Test with example comments
example_texts = [
    "This is a positive comment. I really appreciate your help.",
    "You are an idiot and should not be allowed to post here.",
    "This is neutral content that has no emotional charge.",
    "You are a fucking disgrace."
]

for model_name, predictor in predictors.items():
    print(f"\n===== {model_name} Predictions =====\n")
    
    for text in example_texts:
        result = predictor.predict(text)
        print(f"Input: {text}")
        print(f"Is toxic: {result['is_toxic']}")
        print("Toxicity probabilities:")
        for label, prob in result['probabilities'].items():
            print(f"  {label}: {prob:.4f}")
        print()

## 11. Compare with RNN Models

In [None]:
# Load RNN results if available
try:
    rnn_results_df = pd.read_pickle("../results/all_model_results.pkl")
    print("Loaded RNN model results")
    
    # Extract metrics and model names
    rnn_data = []
    for _, row in rnn_results_df.iterrows():
        model_name = row['model_name']
        accuracy = row['accuracy']
        macro_f1 = row['macro_f1']
        micro_f1 = row['micro_f1']
        
        # Extract class-specific F1 scores
        class_f1 = {col.split('_')[1]: row[col] for col in row.index if col.startswith('f1_')}
        
        rnn_data.append({
            'model_name': model_name,
            'model_type': 'RNN',
            'accuracy': accuracy,
            'macro_f1': macro_f1,
            'micro_f1': micro_f1,
            **{f'f1_{k}': v for k, v in class_f1.items()}
        })
    
    # Create DataFrame from RNN data
    rnn_df = pd.DataFrame(rnn_data)
    
    # Add model type to transformer results
    results_df['model_type'] = 'Transformer'
    results_df.index.name = 'model_name'
    results_df = results_df.reset_index()
    
    # Combine RNN and transformer results
    all_models_df = pd.concat([rnn_df, results_df], ignore_index=True)
    all_models_df = all_models_df.sort_values('macro_f1', ascending=False)
    
    print("\nCombined Model Performance:")
    display(all_models_df[['model_name', 'model_type', 'accuracy', 'macro_f1', 'micro_f1']])
    
    # Save combined results
    all_models_df.to_csv("../results/all_models_comparison.csv")
    
    # Plot comparison
    plt.figure(figsize=(14, 8))
    sns.barplot(x='model_name', y='macro_f1', hue='model_type', data=all_models_df)
    plt.title('Macro F1 Score by Model')
    plt.xlabel('Model')
    plt.ylabel('Macro F1 Score')
    plt.xticks(rotation=45, ha='right')
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.savefig('../results/all_models_macro_f1.png')
    plt.show()
    
except FileNotFoundError:
    print("RNN model results not found. Skipping comparison.")

## 12. Conclusion

In this notebook, we explored transformer-based models for toxic comment classification:

1. We implemented and evaluated three transformer architectures:
   - BERT
   - RoBERTa
   - DistilBERT

2. Each model was trained on a multi-label classification task to detect six types of toxicity.

3. We compared the performance of these models with each other and with RNN models from previous experiments.

4. We created a prediction pipeline for making inferences on new text.

The transformer models generally outperform RNN-based approaches due to their ability to better capture contextual information and their pre-training on large corpora. RoBERTa often achieves the best performance, though DistilBERT offers a good trade-off between performance and speed.