In [3]:
from transformers import AutoTokenizer,AutoModelForSequenceClassification, TrainingArguments,Trainer
from datasets import load_dataset,DatasetDict
import torch


device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
data_path = "/Users/kannavsethi/Desktop/nlp-final-project/data/AI_Human.csv"
text_column = "text"
label_column = "generated"
model_name = "distilbert-base-uncased"

raw_data = load_dataset("csv", data_files={"full": data_path})

raw_data = raw_data.rename_column(text_column, "text")
raw_data = raw_data.rename_column(label_column, "label")

d0 = raw_data["full"].train_test_split(test_size=0.2, seed=42, stratify_by_column="label")

d1 = d0["test"].train_test_split(test_size=0.5, seed=42, stratify_by_column="label")

dataset = DatasetDict({
    "train": d0["train"],       # 80%
    "validation": d1["train"],  # 10%
    "test": d1["test"],         # 10%
})

In [None]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_function(batch):
    return tokenizer(batch["text"], padding=True, truncation=True, return_tensors="pt")

tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [None]:
from evaluation_metrics import compute_metrics_for_trainer

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = 2)
training_args = TrainingArguments(
    output_dir = "./models",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir="./logs",
    logging_steps=10,
    save_strategy="epoch"
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics_for_trainer
)

trainer.train()

In [None]:
results = trainer.evaluate(tokenized_datasets["test"])
print("Test Results:", results)


predictions = trainer.predict(tokenized_datasets["test"])
y_pred = predictions.predictions.argmax(-1)
y_true = predictions.label_ids


from evaluation_metrics import evaluate_model
print("\nDetailed Test Metrics:")
evaluate_model(y_true, y_pred)