In [None]:
#######  PRAJITA #########
### Using RST: RoBERTa ###
##########################

# Import necessary libraries
import pandas as pd
import numpy as np
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import torch
import matplotlib.pyplot as plt
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from scipy.stats import entropy

# Mount Google Drive
drive.mount('/content/drive')

# Dataset Path
file_path = "/content/drive/My Drive/Dataset for AI Generated Text Detection/Dataset_for_AI_Generated_Text_Detection.csv"

# Load dataset
data = pd.read_csv(file_path)
print(data.head())  # Verify dataset structure

# Basic preprocessing
texts = data['text'].tolist()
labels = data['label'].tolist()

# Split dataset
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=42)

# Function to plot ROC curve
def plot_roc_curve(y_true, y_prob, model_name):
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    roc_auc = auc(fpr, tpr)

    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, lw=2, label=f'{model_name} (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve for {model_name}')
    plt.legend(loc="lower right")
    plt.show()

# RST with H-score (simplified implementation)
def compute_h_score(texts1, texts2):
    """Compute a simplified H-score (e.g., based on word frequency divergence)."""
    def get_word_freq(texts):
        word_count = {}
        for text in texts:
            words = text.lower().split()
            for word in words:
                word_count[word] = word_count.get(word, 0) + 1
        total = sum(word_count.values())
        return {k: v / total for k, v in word_count.items()}

    freq1 = get_word_freq(texts1)
    freq2 = get_word_freq(texts2)

    # Hellinger distance as H-score
    common_words = set(freq1.keys()).union(freq2.keys())
    p = np.array([freq1.get(w, 0) for w in common_words])
    q = np.array([freq2.get(w, 0) for w in common_words])
    h_score = np.sqrt(np.sum((np.sqrt(p) - np.sqrt(q)) ** 2)) / np.sqrt(2)
    return h_score

# Apply RST (rudimentary recursive splitting and H-score comparison)
ai_texts = [t for t, l in zip(train_texts, train_labels) if l == 1]
non_ai_texts = [t for t, l in zip(train_texts, train_labels) if l == 0]
h_score = compute_h_score(ai_texts, non_ai_texts)
print(f"H-score between AI and non-AI texts: {h_score:.4f}")

# RoBERTa Tokenizer and Model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

# Tokenize data
def tokenize_data(texts):
    return tokenizer(texts, padding=True, truncation=True, max_length=128, return_tensors='pt')

train_encodings = tokenize_data(train_texts)
test_encodings = tokenize_data(test_texts)

# Create PyTorch dataset
class TextDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = TextDataset(train_encodings, train_labels)
test_dataset = TextDataset(test_encodings, test_labels)

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

## wandb API key : 270175d47b40dc93d0456ac5b2b6f2df5c07a616 ##
# Train the model
trainer.train()

# Evaluate
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
pred_probs = torch.softmax(torch.tensor(predictions.predictions), dim=1)[:, 1].numpy()  # Get probabilities for positive class

# Classification report and confusion matrix
print("\nClassification Report:")
print(classification_report(test_labels, pred_labels, target_names=['Non-AI', 'AI']))

print("\nConfusion Matrix:")
print(confusion_matrix(test_labels, pred_labels))

# Plot ROC curve
plot_roc_curve(test_labels, pred_probs, "RoBERTa")