In [None]:
from pathlib import Path

import torch
from datasets import Dataset, DatasetDict, load_dataset
from torch import Tensor, nn
from tqdm import tqdm
from transformers import Trainer, TrainingArguments

from luminar.classifier import LuminarClassifier
from luminar.utils import PaddingDataCollator, get_matched_datasets

HF_TOKEN = (Path.home() / ".hf_token").read_text().strip()

In [None]:
from transformers import EarlyStoppingCallback

### Encoder

In [None]:
# from luminar.encoder import LuminarEncoder


# encoder = LuminarEncoder()
# encoder.device = "cuda:0"

### Classifier

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    scores = torch.tensor(logits).sigmoid().cpu().flatten().numpy()
    labels = np.array(labels).flatten()

    metrics = {}

    metrics["acc"] = float(accuracy_score(labels, scores > 0.5))
    metrics["f1"] = float(f1_score(labels, scores > 0.5))

    threshold = sorted(scores)[len(labels) - sum(labels) - 1]
    metrics["acc_calibrated"] = float(accuracy_score(labels, scores > threshold))
    metrics["f1_calibrated"] = float(f1_score(labels, scores > threshold))
    metrics["threshold"] = threshold

    if sum(labels) == 0 or sum(labels) == len(labels):
        auroc = -1
    else:
        auroc = float(roc_auc_score(labels, scores))
    metrics["auroc"] = auroc

    return metrics

In [40]:
import numpy as np
import evaluate

acc = evaluate.load("accuracy")
f1 = evaluate.load("f1")


def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # convert the logits to their predicted class
    predictions = np.array(logits) > 0.0
    labels = np.array(labels)
    # acc.compute(predictions=predictions, references=labels)
    f1_score = f1.compute(predictions=predictions, references=labels, average="weighted")
    acc_score = acc.compute(predictions=predictions, references=labels)
    return f1_score | acc_score

In [None]:
agent = "gpt_4o_mini"
feature_len = 256
seed = 42

In [None]:
dataset: Dataset = (
    load_dataset(
        "liberi-luminaris/PrismAI-encoded-gpt2",
        "cnn_news-fulltext",
        split=f"human+{agent}",
        token=HF_TOKEN,
    )  # type: ignore
    .map(
        lambda features: {"features": features[:feature_len]},
        input_columns=["features"],
        desc="Trimming Features",
    )
    .rename_column("label", "labels")
)

In [None]:
datasets_matched = get_matched_datasets(dataset, agent)
datasets_matched.set_format("torch", columns=["labels", "features"])
datasets_matched

In [None]:
dataset_train = datasets_matched["train"].train_test_split(
    test_size=1 / 8,
    shuffle=True,
    seed=seed,
).with_format(
    "torch", columns=["labels", "features"]
)

In [None]:
config = {
    # first 256 features & 13 layers for gpt2
    "feature_dim": (feature_len, 13),
    "feature_type": "intermediate_likelihoods",
    "feature_selection": "first",
    # "projection_dim": None,
    "projection_dim": 32,
    "learning_rate": 5e-4,
    "max_epochs": 5,
    "gradient_clip_val": 1.0,
    "train_batch_size": 32,
    "eval_batch_size": 1024,
    "seed": seed,
    "agent": agent,
}

training_args = TrainingArguments(
    output_dir="../logs/hf/",
    per_device_train_batch_size=config["train_batch_size"],
    per_device_eval_batch_size=config["eval_batch_size"],
    learning_rate=config["learning_rate"],
    num_train_epochs=config["max_epochs"],
    logging_steps=100,
    warmup_ratio=1.0,
    metric_for_best_model="loss",
    load_best_model_at_end=True,
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
)

In [41]:
# classifier = LuminarClassifier(**config)
trainer = Trainer(
    model=classifier,
    args=training_args,
    train_dataset=dataset_train["train"],
    eval_dataset=dataset_train["test"],
    data_collator=PaddingDataCollator(config["feature_dim"]),
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(3)],
)
# trainer.train()

In [42]:
trainer.evaluate()

{'eval_loss': 0.2590530812740326,
 'eval_model_preparation_time': 0.0002,
 'eval_f1': 0.890358123870099,
 'eval_accuracy': 0.890393567498942,
 'eval_runtime': 7.8173,
 'eval_samples_per_second': 302.28,
 'eval_steps_per_second': 0.384}

In [43]:
trainer.evaluate(datasets_matched["test"])

{'eval_loss': 0.24068349599838257,
 'eval_model_preparation_time': 0.0002,
 'eval_f1': 0.8961064161686484,
 'eval_accuracy': 0.8961066440964875,
 'eval_runtime': 14.8392,
 'eval_samples_per_second': 318.48,
 'eval_steps_per_second': 0.337}

In [44]:
trainer.evaluate(datasets_matched["test_unmatched"])

{'eval_loss': 0.2373393177986145,
 'eval_model_preparation_time': 0.0002,
 'eval_f1': 0.9464042392127177,
 'eval_accuracy': 0.8982612444316712,
 'eval_runtime': 21.9345,
 'eval_samples_per_second': 317.262,
 'eval_steps_per_second': 0.319}

In [None]:
print(classifier)
print(
    "Parameters:"
    "\n  conv_layers:",
    sum(
        param.numel()
        for param in classifier.conv_layers.parameters()
        if param.requires_grad
    ),
    "\n  projection:",
    sum(
        param.numel()
        for param in classifier.projection.parameters()
        if param.requires_grad
    ),
    "\n  classifier:",
    sum(
        param.numel()
        for param in classifier.classifier.parameters()
        if param.requires_grad
    ),
    "\n  total:",
    sum(param.numel() for param in classifier.parameters() if param.requires_grad),
)

In [None]:
import gc
import json

from tqdm.auto import tqdm

scores = {}
for config_name, dataset in tqdm(datasets_truncated.items(), desc="Training Models"):
    model = LuminarClassifier()
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.00005)

    dataset_train = dataset["train"].with_format("torch", ["features", "label"])

    tq = tqdm(range(3), desc="Training " + config_name, leave=False)
    for i in tq:
        for batch in dataset_train.shuffle(i).batch(batch_size=32):
            optimizer.zero_grad()
            features = batch["features"]
            labels = batch["label"].float().unsqueeze(-1)

            preds = model(features)

            loss = criterion(preds, labels)

            loss.backward()
            optimizer.step()

            tq.set_postfix_str(f"loss: {loss.item()}")

    model.eval()
    scores[config_name] = evaluate(model, {config_name: dataset})[config_name]
    ## OOD Evaluation
    # scores[config_name] = evaluate(model, datasets_truncated)

    print(config_name, json.dumps(scores[config_name], indent=4))

    del model
    gc.collect()
    torch.cuda.empty_cache()


In [None]:
raise RuntimeError()

In [None]:
import json

print(json.dumps(scores, indent=4))
with open("../logs/luminar/gpt2_first_128-3_epochs.json", "w") as f:
    json.dump(scores, f, indent=4)

In [None]:
datasets = {}
for subset in [
    "blog_authorship_corpus",
    "student_essays",
    "cnn_news",
    "euro_court_cases",
    "house_of_commons",
    "arxiv_papers",
    "gutenberg_en",
    "en",
    "bundestag",
    "spiegel_articles",
    "gutenberg_de",
    "de",
]:
    config_name = f"{subset}-fulltext"
    datasets[config_name] = load_dataset(
        "liberi-luminaris/PrismAI-encoded-gpt2",
        config_name,
        token=HF_TOKEN,
        split="human+gpt_4o_mini",
    )

In [None]:
dataset = load_dataset(
    "liberi-luminaris/PrismAI-fulltext", "cnn_news", split="human+gpt_4o_mini"
)
dataset_human = dataset.filter(lambda sample: sample["agent"] == "human")
source_ids = set(
    dataset_human.shuffle(seed=42).take(len(dataset_human) // 10 * 8)["id_source"]
)
dataset_train = dataset.filter(lambda sample: sample["id_source"] in source_ids)

In [None]:
datasets_truncated = {}
for config_name, dataset in datasets.items():
    datasets_truncated[config_name] = dataset.with_format(
        "numpy", columns=["features"], output_all_columns=True
    ).map(
        lambda batch: {"features": batch["features"][:, :256]},
        batched=True,
    )

In [None]:
datasets_considered = {
    key: value
    for key, value in datasets_truncated.items()
    if not key.startswith("de-") and not key.startswith("en-")
}

In [None]:
import pandas as pd

domains = [
    "Web Blogs",
    "Essays",
    "CNN",
    "ECHR",
    "HoC",
    "arXiv",
    "Gutenberg$_{en}$",
    "Bundestag$_{de}$",
    "Spiegel$_{de}$",
    "Gutenberg$_{de}$",
    "All$_{en}$",
    "All$_{de}$",
]
name_map = {
    "blog_authorship_corpus": "Web Blogs",
    "student_essays": "Essays",
    "cnn_news": "CNN",
    "euro_court_cases": "ECHR",
    "house_of_commons": "HoC",
    "arxiv_papers": "arXiv",
    "gutenberg_en": "Gutenberg$_{en}$",
    "bundestag": "Bundestag$_{de}$",
    "spiegel_articles": "Spiegel$_{de}$",
    "gutenberg_de": "Gutenberg$_{de}$",
    "en": "All$_{en}$",
    "de": "All$_{de}$",
}

results = [
    {"domain": name_map[key.split("-", 1)[0]]}
    | {
        "f1": value["f1"],
        "acc": value["accuracy"],
        "auroc": value["auroc"],
    }
    for key, value in scores.items()
]
metric_df = (
    pd.DataFrame(results)
    .set_index("domain")
    .sort_index(key=lambda x: list(map(domains.index, x)))
)
print(metric_df.to_latex(float_format="%.3f", index=True))
metric_df

In [None]:
# def run_detector(
#     detector: DetectorABC, datasets: dict[str, DatasetDict]
# ) -> dict[str, float]:
#     scores = {}
#     for config_name, ds in tqdm(datasets.items(), desc="Predicting on Datasets"):
#         dataset: Dataset = ds["test"].map(
#             detector.tokenize,
#             input_columns=["text"],
#             batched=True,
#             batch_size=1024,
#             desc="Tokenizing",
#         )
#         dataset = dataset.sort("length")
#         dataset = dataset.map(
#             detector.process,
#             batched=True,
#             batch_size=128,
#             desc="Predicting",
#         )

#         dataset_np = dataset.select_columns(["prediction", "label"]).with_format(
#             "numpy"
#         )

#         acc, f1, auroc = get_scores(dataset_np["label"], dataset_np["prediction"])
#         scores[config_name] = {"accuracy": acc, "f1": f1, "auroc": auroc}

#         acc, f1, auroc = get_scores(
#             dataset_np["label"],
#             dataset_np["prediction"],
#             calibrated=True,
#         )
#         scores[config_name] |= {
#             "accuracy_calibrated": acc,
#             "f1_calibrated": f1,
#             "auroc_calibrated": auroc,
#         }
#     return scores


In [None]:
# def evaluate(model: LuminarClassifier, datasets: dict[str, DatasetDict]) -> dict:
#     scores = {}
#     for config_name, dataset in tqdm(datasets.items(), desc="Evaluating", leave=False):
#         ds = (
#             dataset["test"]
#             .with_format("torch", ["features"])
#             .map(model.process, batched=True, batch_size=32, desc="Predicting")
#         )
#         dataset_np = ds.select_columns(["prediction", "label"]).with_format("numpy")

#         acc, f1, auroc = get_scores(dataset_np["label"], dataset_np["prediction"])
#         scores[config_name] = {
#             "accuracy": acc,
#             "f1": f1,
#             "auroc": auroc,
#         }

#         acc, f1, auroc = get_scores(
#             dataset_np["label"],
#             dataset_np["prediction"],
#             calibrated=True,
#         )
#         scores[config_name] |= {
#             "accuracy_calibrated": acc,
#             "f1_calibrated": f1,
#             "auroc_calibrated": auroc,
#         }

#     return scores