In [None]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from torch.optim import AdamW
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

train_data = pd.read_csv('./data/your_input_file.csv')
test_data = pd.read_csv('./data/your_input_file.csv')

train_texts, dev_texts, train_labels, dev_labels = train_test_split(
    train_data['input'].tolist(), train_data['output'].tolist(), test_size=0.1, random_state=42)

test_texts = test_data['input'].tolist()
test_labels = test_data['output'].tolist()

label_mapping = {'safe': 0, 'toxic': 1}

train_labels = [label_mapping[label] for label in train_labels]
dev_labels = [label_mapping[label] for label in dev_labels]
test_labels = [label_mapping[label] for label in test_labels]

class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.encodings = tokenizer(texts, truncation=True, padding="max_length", max_length=max_length)
        self.labels = torch.tensor(labels)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

from typing import List, Dict

def train_transformers(model_name: str, max_length=128, num_labels=2, epochs=3):
    print(f"\n{'='*20} Training {model_name} {'='*20}")
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
    model.config.pad_token_id = tokenizer.pad_token_id
    model.resize_token_embeddings(len(tokenizer))

    train_dataset = SentimentDataset(train_texts, train_labels, tokenizer, max_length)
    dev_dataset = SentimentDataset(dev_texts, dev_labels, tokenizer, max_length)
    test_dataset = SentimentDataset(test_texts, test_labels, tokenizer, max_length)

    optimizer = AdamW(model.parameters(), lr=2e-5)

    accuracy_results, f1_results, recall_results, precision_results = [], [], [], []
    best_epoch, best_accuracy, best_f1, best_recall, best_precision = 0, 0.0, 0.0, 0.0, 0.0

    for num_epochs in range(1, epochs + 1):
        training_args = TrainingArguments(
            output_dir=f'./results_{model_name}_{num_epochs}',
            num_train_epochs=num_epochs,
            per_device_train_batch_size=8,
            per_device_eval_batch_size=8,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir=f'./logs_{model_name}_{num_epochs}',
            evaluation_strategy="epoch",
            save_strategy="epoch",
            load_best_model_at_end=True,
            save_total_limit=2,
            logging_steps=500,
            report_to="none",
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=dev_dataset,
            optimizers=(optimizer, None)
        )

        start_train_time = time.time()
        trainer.train()
        train_time = time.time() - start_train_time

        start_infer_time = time.time()
        predictions = trainer.predict(test_dataset)
        infer_time = (time.time() - start_infer_time) / len(test_dataset)

        y_pred = torch.argmax(torch.tensor(predictions.predictions), axis=1).numpy()

        acc = accuracy_score(test_labels, y_pred)
        f1_weighted = f1_score(test_labels, y_pred, average='weighted')
        recall_weighted = recall_score(test_labels, y_pred, average='weighted')
        precision_weighted = precision_score(test_labels, y_pred, average='weighted')
        cm = confusion_matrix(test_labels, y_pred)
        num_params = sum(p.numel() for p in model.parameters())

        print(f"=== Epoch {num_epochs} ===")
        print(f"Accuracy: {acc:.4f} | F1-weighted: {f1_weighted:.4f} | Recall-weighted: {recall_weighted:.4f} | Precision-weighted: {precision_weighted:.4f}")
        print("Confusion Matrix:\n", cm)
        print(f"Model Params: {num_params}")
        print(f"Training Time: {train_time:.2f}s | Inference Time/sample: {infer_time:.4f}s\n")

        accuracy_results.append(acc)
        f1_results.append(f1_weighted)
        recall_results.append(recall_weighted)
        precision_results.append(precision_weighted)

        if acc > best_accuracy:
            best_epoch = num_epochs
            best_accuracy = acc
            best_f1 = f1_weighted
            best_recall = recall_weighted
            best_precision = precision_weighted

    print(f"\nBEST Epoch: {best_epoch} | Accuracy: {best_accuracy:.4f} | F1: {best_f1:.4f} | Recall: {best_recall:.4f} | Precision: {best_precision:.4f}")
    print("Accuracy Curve:", accuracy_results)
    print("F1 Curve:", f1_results)
    print("Recall Curve:", recall_results)
    print("Precision Curve:", precision_results)

model_list = [
    'bert-base-uncased',
    'gpt2',
    'answerdotai/modernbert-base',
    'roberta-base',
    'microsoft/deberta-base'
]

for model_name in model_list:
    train_transformers(model_name, max_length=128 if 'roberta' in model_name or 'deberta' in model_name else 64)
