<a href="https://colab.research.google.com/github/Ak4nksha/ai-generated-text-detector/blob/main/notebooks/06_transformer_finetune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Transformer Fine-tuning (Human vs AI)

Goal: Fine-tune a pretrained transformer for binary classification:
**human-written vs LLM-generated text**.

- Uses the fixed `train/val/test` splits created earlier.
- Trains an end-to-end transformer classifier (not frozen).
- Reports validation and test metrics.


In [None]:
!pip -q install transformers datasets evaluate accelerate scikit-learn pandas numpy tqdm

In [None]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
from dataclasses import dataclass
from typing import Dict, List

from sklearn.metrics import accuracy_score, f1_score, classification_report


In [None]:
from google.colab import drive
drive.mount("/content/drive")

SPLITS_DIR = Path("/content/drive/MyDrive/artifacts/splits_v1")

train_df = pd.read_csv(SPLITS_DIR / "train.csv")
val_df   = pd.read_csv(SPLITS_DIR / "val.csv")
test_df  = pd.read_csv(SPLITS_DIR / "test.csv")

for name, df in [("train", train_df), ("val", val_df), ("test", test_df)]:
    if "text" not in df.columns or "label" not in df.columns:
        raise ValueError(f"{name}.csv must contain columns: text, label")

print("Loaded splits:", len(train_df), len(val_df), len(test_df))
print("Train label dist:", np.bincount(train_df["label"].astype(int).values))


In [None]:
from datasets import Dataset

train_ds = Dataset.from_pandas(train_df[["text", "label"]])
val_ds   = Dataset.from_pandas(val_df[["text", "label"]])
test_ds  = Dataset.from_pandas(test_df[["text", "label"]])

print(train_ds)


In [None]:
from transformers import AutoTokenizer

MODEL_NAME = "distilbert-base-uncased"
MAX_LEN = 256

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

def tokenize_batch(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        max_length=MAX_LEN,
        padding=False,   # we'll pad dynamically in the collator
    )

train_tok = train_ds.map(tokenize_batch, batched=True, remove_columns=["text"])
val_tok   = val_ds.map(tokenize_batch, batched=True, remove_columns=["text"])
test_tok  = test_ds.map(tokenize_batch, batched=True, remove_columns=["text"])

print(" Tokenized.")


In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)


In [None]:
from transformers import AutoModelForSequenceClassification

num_labels = 2
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=num_labels)


In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)

    acc = accuracy_score(labels, preds)
    f1  = f1_score(labels, preds)
    return {"accuracy": acc, "f1": f1}


In [None]:
## Training setup

from transformers import TrainingArguments, Trainer

OUTPUT_DIR = "./artifacts/transformer_finetune/distilbert_run_v1"

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    # evaluation_strategy="epoch",
    # save_strategy="epoch",
    save_total_limit=2,
    # load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,

    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.01,

    fp16=True,  # works on most Colab GPUs; if error, set fp16=False
    logging_steps=50,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=val_tok,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


In [None]:
trainer.train()

In [None]:
val_metrics = trainer.evaluate(val_tok)
print("Val metrics:", val_metrics)

test_metrics = trainer.evaluate(test_tok)
print("Test metrics:", test_metrics)


<!-- **DistilBERT fine-tuning results (fixed splits):**
- Validation F1 ≈ 0.996
- Test F1 ≈ 0.849

Large generalization gap indicates strong domain shift between training and test data. -->
