In [32]:
import pandas as pd
import numpy as np
import torch
import logging
from sklearn.model_selection import train_test_split
import json

from datasets import Dataset
from transformers import (
    RobertaTokenizer,
    RobertaForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
)
from sklearn.metrics import accuracy_score

In [14]:
# Load CSV
df = pd.read_csv('Data/processed_ai_vs_human.csv')
df.head()

Unnamed: 0,text,generated
0,Cars Cars around since became famous 1900s Hen...,0
1,Transportation large necessity countries world...,0
2,Americas love affair vehicles seems cooling sa...,0
3,often ride car drive one motor vehicle work st...,0
4,Cars wonderful thing perhaps one worlds greate...,0


In [38]:
filenames = ['Data/arxiv_chatGPT.jsonl', 'Data/arxiv_cohere.jsonl','Data/reddit_chatGPT.jsonl','Data/reddit_cohere.jsonl']
rows = []

for file in filenames: 
     with open(file, "r", encoding="utf-8") as file:
        for line in file:
            entry = json.loads(line)
            if "human_text" in entry and "machine_text" in entry:
                rows.append({"text": entry["human_text"].strip(), "generated": 0})
                rows.append({"text": entry["machine_text"].strip(), "generated": 1})

# Create DataFrame
json_df = pd.DataFrame(rows)

In [50]:
from sklearn.model_selection import train_test_split
from datasets import Dataset

# Toggle to use a small subset for debugging
USE_SMALL_DATASET = True  
USE_GIVEN_DEV_SET = True

# Clean empty or invalid entries
df = df[df['text'].apply(lambda x: isinstance(x, str) and len(x.strip()) > 0)]

# Split into train and test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Optionally reduce to 1/80th of each set
if USE_SMALL_DATASET:
    train_df = train_df.sample(frac=1/20, random_state=42)
    test_df = test_df.sample(frac=1/20, random_state=42)

if USE_GIVEN_DEV_SET:
    test_df = json_df

# Convert to Hugging Face Datasets
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)


In [51]:
# Load tokenizer and model
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=2)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [52]:
# Tokenize
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=128)

tokenized_train = train_dataset.map(tokenize_function, batched=True)
tokenized_test = test_dataset.map(tokenize_function, batched=True)

tokenized_train = tokenized_train.rename_column("generated", "labels")
tokenized_test = tokenized_test.rename_column("generated", "labels")\

tokenized_train.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_test.set_format("torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/889 [00:00<?, ? examples/s]

Map:   0%|          | 0/24000 [00:00<?, ? examples/s]

In [53]:
# Training args
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="no",
    save_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    load_best_model_at_end=False,
    metric_for_best_model="accuracy",
    gradient_accumulation_steps=2,
    greater_is_better=True
)

In [54]:
# Metrics
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=1)
    
    acc = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    
    return {
        "accuracy": acc,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }


In [55]:
# Trainer
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

  trainer = Trainer(


In [56]:
# Train
trainer.train()



Step,Training Loss


TrainOutput(global_step=140, training_loss=0.144110107421875, metrics={'train_runtime': 979.6481, 'train_samples_per_second': 4.537, 'train_steps_per_second': 0.143, 'total_flos': 292382160268800.0, 'train_loss': 0.144110107421875, 'epoch': 5.0})

In [57]:
# Evaluate
trainer.evaluate()

{'eval_loss': 0.3177065849304199,
 'eval_accuracy': 0.8767916666666666,
 'eval_precision': 0.9004516871844832,
 'eval_recall': 0.84725,
 'eval_f1': 0.8730410888325962,
 'eval_runtime': 934.5744,
 'eval_samples_per_second': 25.68,
 'eval_steps_per_second': 1.605,
 'epoch': 5.0}

In [31]:
# Save model
model.save_pretrained("./results/final_model")
tokenizer.save_pretrained("./results/final_model")

('./results/final_model/tokenizer_config.json',
 './results/final_model/special_tokens_map.json',
 './results/final_model/vocab.json',
 './results/final_model/merges.txt',
 './results/final_model/added_tokens.json')

In [47]:
preds = trainer.predict(tokenized_test)
pred_labels = preds.predictions.argmax(axis=1)
print("Predicted label counts:", pd.Series(pred_labels).value_counts())

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Predicted label counts: 0    24000
Name: count, dtype: int64


In [49]:
true_labels = test_df["generated"]
print("True label counts:", true_labels.value_counts())

True label counts: generated
0    12000
1    12000
Name: count, dtype: int64
