<a href="https://colab.research.google.com/github/SaiffTunio/AI-Generated-Text-Detection/blob/main/saif.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
!pip install evaluate
!pip install datasets

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from evaluate import load

# Load the tokenizer and model
model_name = "roberta-large-openai-detector"  # Use a pre-trained detection model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Example dataset (texts and labels)
texts = ["This is likely AI-generated.", "This sentence was written by a human."]
labels = [1, 0]  # 1 for AI-generated, 0 for human-written

# Tokenize the dataset and include input_ids and attention_mask
def preprocess_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# Create a dataset object
dataset = Dataset.from_dict({"text": texts, "labels": labels})

# Apply the preprocessing function to the dataset
encoded_dataset = dataset.map(preprocess_function, batched=True)

# Define accuracy metric
metric = load("accuracy")

# Define a function to evaluate the model
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Define Training Arguments
training_args = TrainingArguments(output_dir="./results", per_device_eval_batch_size=16)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

# Evaluate the model using the encoded dataset
results = trainer.evaluate(eval_dataset=encoded_dataset)

# Print accuracy
print("Accuracy:", results["eval_accuracy"])


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from evaluate import load
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
import os

# Disable W&B logging if not using it
os.environ["WANDB_DISABLED"] = "true"

# Load the tokenizer and model
model_name = "roberta-large-openai-detector"  # Use a pre-trained detection model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Example dataset (texts and labels)
texts = ["This is likely AI-generated.", "This sentence was written by a human."]
labels = [1, 0]  # 1 for AI-generated, 0 for human-written

# Tokenize the dataset and include input_ids and attention_mask
def preprocess_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

# Create a dataset object
dataset = Dataset.from_dict({"text": texts, "labels": labels})

# Apply the preprocessing function to the dataset
encoded_dataset = dataset.map(preprocess_function, batched=True)

# Define accuracy metric (optional, we'll use sklearn too)
metric = load("accuracy")

# Define a function to evaluate the model
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)

    # Compute accuracy and F1 score using sklearn
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average="weighted")  # Using weighted average for F1

    # Compute confusion matrix but only print it
    conf_matrix = confusion_matrix(labels, predictions)
    print("Confusion Matrix:\n", conf_matrix)

    # Return only scalar metrics for logging
    return {
        "accuracy": acc,
        "f1": f1
    }

# Define Training Arguments and disable external reporting
training_args = TrainingArguments(
    output_dir="./results",
    per_device_eval_batch_size=16,
    report_to="none"  # Disable W&B and other external loggers
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer
)

# Evaluate the model using the encoded dataset
results = trainer.evaluate(eval_dataset=encoded_dataset)

# Print accuracy and F1 score
print("Accuracy:", results["eval_accuracy"])
print("F1 Score:", results["eval_f1"])


Some weights of the model checkpoint at roberta-large-openai-detector were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Confusion Matrix:
 [[1 0]
 [0 1]]
Accuracy: 1.0
F1 Score: 1.0
