# GPT Models for Toxic Comment Classification

This notebook demonstrates how to fine-tune and evaluate GPT-based transformer models (e.g., GPT-2) for multi-label toxic comment classification.

In [3]:
# Install required libraries (uncomment if needed)
# !pip install transformers scikit-learn torch pandas matplotlib seaborn

In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2Model, GPT2PreTrainedModel, GPT2Config, get_linear_schedule_with_warmup
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import f1_score, accuracy_score
import os
import torch.optim as optim
import random

# Set random seeds and device
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


NameError: name 'model' is not defined

In [None]:
# ...existing code...
from transformers import GPT2Tokenizer, GPT2Model, GPT2PreTrainedModel, GPT2Config, get_linear_schedule_with_warmup
import torch.optim as optim
# ...existing code...

# When initializing the optimizer:
optimizer = optim.AdamW(model.parameters(), lr=2e-5)
# ...existing code...

## 1. Load and Prepare Data

In [None]:
# Load preprocessed data
train_data = pd.read_csv('../Dataset/train_preprocessed.csv')
X = train_data['comment_text'].fillna("")
y = train_data[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].fillna(0)

# Split data (simple split for demonstration)
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=42, stratify=y_val)

## 2. Dataset Class for GPT

In [None]:
class ToxicCommentDatasetGPT(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts.reset_index(drop=True)
        self.labels = labels.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_length = max_length
    def __len__(self):
        return len(self.texts)
    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        labels = torch.FloatTensor(self.labels.iloc[idx].values)
        encoding = self.tokenizer(
            text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        encoding = {k: v.squeeze(0) for k, v in encoding.items()}
        encoding['labels'] = labels
        return encoding

## 3. GPT Model for Multi-Label Classification

In [None]:
class GPT2ForMultiLabelClassification(GPT2PreTrainedModel):
    def __init__(self, config, num_labels=6):
        super().__init__(config)
        self.gpt2 = GPT2Model(config)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(config.hidden_size, num_labels)
        self.init_weights()
    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.gpt2(input_ids=input_ids, attention_mask=attention_mask)
        last_hidden_state = outputs.last_hidden_state
        pooled_output = last_hidden_state[:, 0]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)
        probs = torch.sigmoid(logits)
        return probs

## 4. Training and Evaluation Functions

In [None]:
def train_epoch(model, data_loader, optimizer, scheduler, device, criterion):
    model.train()
    optimizer = optim.AdamW(model.parameters(), lr=2e-5)
    total_loss = 0
    for batch in data_loader:
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()
    return total_loss / len(data_loader)

def evaluate(model, data_loader, device, threshold=0.5):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = (outputs > threshold).float().cpu().numpy()
            all_preds.append(preds)
            all_labels.append(labels.cpu().numpy())
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    macro_f1 = f1_score(all_labels, all_preds, average='macro')
    micro_f1 = f1_score(all_labels, all_preds, average='micro')
    accuracy = accuracy_score(all_labels.flatten(), all_preds.flatten())
    return {'macro_f1': macro_f1, 'micro_f1': micro_f1, 'accuracy': accuracy}

## 5. Initialize Tokenizer, Model, and DataLoaders

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token  # GPT2 has no pad token by default
config = GPT2Config.from_pretrained('gpt2')
model = GPT2ForMultiLabelClassification(config, num_labels=6)
model.to(device)

train_dataset = ToxicCommentDatasetGPT(X_train, y_train, tokenizer)
val_dataset = ToxicCommentDatasetGPT(X_val, y_val, tokenizer)
test_dataset = ToxicCommentDatasetGPT(X_test, y_test, tokenizer)

BATCH_SIZE = 8
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

## 6. Training Loop

In [None]:
optimizer = AdamW(model.parameters(), lr=2e-5)
num_epochs = 3
total_steps = len(train_loader) * num_epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=int(0.1*total_steps), num_training_steps=total_steps)
criterion = nn.BCELoss()

best_val_f1 = 0
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss = train_epoch(model, train_loader, optimizer, scheduler, device, criterion)
    val_metrics = evaluate(model, val_loader, device)
    print(f"Train Loss: {train_loss:.4f} | Val Macro F1: {val_metrics['macro_f1']:.4f} | Val Micro F1: {val_metrics['micro_f1']:.4f}")
    if val_metrics['macro_f1'] > best_val_f1:
        best_val_f1 = val_metrics['macro_f1']
        torch.save(model.state_dict(), '../models/gpt2_toxic_classifier.pt')
        print("Saved best model.")

## 7. Evaluate on Test Set

In [None]:
# Load best model
model.load_state_dict(torch.load('../models/gpt2_toxic_classifier.pt', map_location=device))
model.to(device)
test_metrics = evaluate(model, test_loader, device)
print(f"Test Macro F1: {test_metrics['macro_f1']:.4f} | Test Micro F1: {test_metrics['micro_f1']:.4f} | Test Accuracy: {test_metrics['accuracy']:.4f}")

## 8. Inference Pipeline

In [None]:
def predict_toxicity_gpt(text, model, tokenizer, device, threshold=0.5):
    model.eval()
    inputs = tokenizer(
        text,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        probs = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask']).squeeze().cpu().numpy()
    preds = (probs > threshold).astype(int)
    labels = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
    return dict(zip(labels, preds)), dict(zip(labels, probs))

# Example
example = "You are a disgrace and should be banned."
preds, probs = predict_toxicity_gpt(example, model, tokenizer, device)
print(f"Input: {example}")
print("Predictions:", preds)
print("Probabilities:", probs)