In [1]:
from pathlib import Path

import torch
from datasets import Dataset, DatasetDict, load_dataset
from torch import Tensor, nn
from tqdm import tqdm
from transformers import Trainer, TrainingArguments

from luminar.classifier import LuminarCNN
from luminar.utils import PaddingDataCollator, get_matched_datasets

HF_TOKEN = (Path.home() / ".hf_token").read_text().strip()

In [2]:
from transformers import EarlyStoppingCallback

### Encoder

In [3]:
# from luminar.encoder import LuminarEncoder


# encoder = LuminarEncoder()
# encoder.device = "cuda:0"

### Classifier

In [4]:
# import numpy as np
# from sklearn.metrics import accuracy_score, f1_score, roc_auc_score


# def compute_metrics(eval_pred):
#     logits, labels = eval_pred
#     scores = torch.tensor(logits).sigmoid().cpu().flatten().numpy()
#     labels = np.array(labels).flatten()

#     metrics = {}

#     metrics["acc"] = float(accuracy_score(labels, scores > 0.5))
#     metrics["f1"] = float(f1_score(labels, scores > 0.5))

#     threshold = sorted(scores)[len(labels) - sum(labels) - 1]
#     metrics["acc_calibrated"] = float(accuracy_score(labels, scores > threshold))
#     metrics["f1_calibrated"] = float(f1_score(labels, scores > threshold))
#     metrics["threshold"] = threshold

#     if sum(labels) == 0 or sum(labels) == len(labels):
#         auroc = -1
#     else:
#         auroc = float(roc_auc_score(labels, scores))
#     metrics["auroc"] = auroc

#     return metrics

In [5]:
import evaluate
import numpy as np
from numpy.typing import NDArray

acc = evaluate.load("accuracy")
f1 = evaluate.load("f1")
roc_auc = evaluate.load("roc_auc")


def compute_scores(preds: NDArray, labels: NDArray, suffix=""):
    f1_score_each = f1.compute(predictions=preds, references=labels, average=None)
    f1_score_weighted = f1.compute(
        predictions=preds, references=labels, average="weighted"
    )
    acc_score = acc.compute(predictions=preds, references=labels)
    roc_auc_score = roc_auc.compute(prediction_scores=preds, references=labels)

    return {
        f"f1_each_{i}{suffix}": score
        for i, score in enumerate(f1_score_each["f1"])  # type: ignore
    } | {
        f"f1_weighted{suffix}": f1_score_weighted["f1"],  # type: ignore
        f"accuracy{suffix}": acc_score["accuracy"],  # type: ignore
        f"roc_auc{suffix}": roc_auc_score["roc_auc"],  # type: ignore
    }


def compute_metrics(eval_pred):
    logits, labels = eval_pred

    labels = np.array(labels)
    logits = 1 / (1 + np.exp(-np.array(logits)))

    metrics = compute_scores(logits > 0.5, labels)

    threshold = np.median(logits)
    metrics |= compute_scores(logits > threshold, labels, "_median")
    metrics["threshold_median"] = threshold

    metrics["ground_truth_0"] = np.sum(labels == 0)
    metrics["ground_truth_1"] = np.sum(labels == 1)

    return metrics

In [6]:
agent = "gpt_4o_mini"
feature_len = 256
seed = 42

In [7]:
dataset: Dataset = (
    load_dataset(
        "liberi-luminaris/PrismAI-encoded-gpt2",
        "cnn_news-fulltext",
        split=f"human+{agent}+gemma2_9b",
        token=HF_TOKEN,
    )  # type: ignore
    .map(
        lambda features: {"features": features[:feature_len]},
        input_columns=["features"],
        desc="Trimming Features",
    )
    .rename_column("label", "labels")
)

In [8]:
datasets_matched = get_matched_datasets(dataset, agent)
datasets_matched.set_format("torch", columns=["labels", "features"])
# dataset_train = datasets_matched["matched"].train_test_split(
#     test_size=0.2, seed=seed, shuffle=True
# ).with_format("torch", columns=["labels", "features"])
# dataset_test = dataset_train.pop("test")
# dataset_train = dataset_train["train"].train_test_split(1 / 16, seed=seed, shuffle=True)

Filter:   0%|          | 0/44386 [00:00<?, ? examples/s]

Filter:   0%|          | 0/44386 [00:00<?, ? examples/s]

Filter:   0%|          | 0/44386 [00:00<?, ? examples/s]

Filter:   0%|          | 0/44386 [00:00<?, ? examples/s]

In [9]:
config = {
    # first 256 features & 13 layers for gpt2
    "feature_dim": (feature_len, 13),
    "feature_type": "intermediate_likelihoods",
    "feature_selection": "first",
    "projection_dim": 32,
    "learning_rate": 6e-4,
    "max_epochs": 5,
    "gradient_clip_val": 1.0,
    "train_batch_size": 32,
    "eval_batch_size": 1024,
    "seed": seed,
    "agent": agent,
}

training_args = TrainingArguments(
    output_dir="../logs/hf/",
    per_device_train_batch_size=config["train_batch_size"],
    per_device_eval_batch_size=config["eval_batch_size"],
    learning_rate=config["learning_rate"],
    num_train_epochs=config["max_epochs"],
    logging_steps=100,
    warmup_ratio=1.0,
    metric_for_best_model="loss",
    load_best_model_at_end=True,
    eval_strategy="steps",
    eval_steps=100,
    save_strategy="steps",
)

In [10]:
classifier = LuminarCNN(**config)

In [11]:
trainer = Trainer(
    model=classifier,
    args=training_args,
    train_dataset=datasets_matched["train"],
    eval_dataset=datasets_matched["eval"],
    data_collator=PaddingDataCollator(config["feature_dim"]),
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(3)],
)

In [12]:
trainer.train()

Step,Training Loss,Validation Loss,F1 Each 0,F1 Each 1,F1 Weighted,Accuracy,Roc Auc,F1 Each 0 Median,F1 Each 1 Median,F1 Weighted Median,Accuracy Median,Roc Auc Median,Threshold Median,Ground Truth 0,Ground Truth 1
100,0.6932,0.692968,0.0,0.666667,0.333333,0.5,0.5,0.671743,0.671743,0.671743,0.671743,0.671743,0.50459,1182,1182
200,0.6878,0.68094,0.691644,0.240293,0.465969,0.561337,0.561337,0.722504,0.722504,0.722504,0.722504,0.722504,0.46939,1182,1182
300,0.6092,0.626644,0.553364,0.743675,0.64852,0.674281,0.674281,0.739425,0.739425,0.739425,0.739425,0.739425,0.73367,1182,1182
400,0.5661,0.518602,0.761457,0.72,0.740729,0.742386,0.742386,0.753807,0.753807,0.753807,0.753807,0.753807,0.415944,1182,1182
500,0.525,0.485968,0.782539,0.756611,0.769575,0.770305,0.770305,0.77242,0.77242,0.77242,0.77242,0.77242,0.43401,1182,1182
600,0.4619,0.46368,0.790905,0.772627,0.781766,0.782149,0.782149,0.787648,0.787648,0.787648,0.787648,0.787648,0.437262,1182,1182
700,0.4634,0.490676,0.799702,0.737665,0.768683,0.772843,0.772843,0.799492,0.799492,0.799492,0.799492,0.799492,0.264896,1182,1182
800,0.4262,0.444031,0.819304,0.764964,0.792134,0.795685,0.795685,0.820643,0.820643,0.820643,0.820643,0.820643,0.290383,1182,1182
900,0.3838,0.377916,0.813898,0.851161,0.83253,0.834602,0.834602,0.865482,0.865482,0.865482,0.865482,0.865482,0.776102,1182,1182
1000,0.3299,0.430345,0.833994,0.763711,0.798852,0.804992,0.804992,0.894247,0.894247,0.894247,0.894247,0.894247,0.110612,1182,1182


TrainOutput(global_step=1800, training_loss=0.4077093675401476, metrics={'train_runtime': 328.7459, 'train_samples_per_second': 251.532, 'train_steps_per_second': 7.863, 'total_flos': 0.0, 'train_loss': 0.4077093675401476, 'epoch': 3.481624758220503})

In [13]:
trainer.evaluate()

{'eval_loss': 0.19212588667869568,
 'eval_f1_each_0': 0.9224137931034483,
 'eval_f1_each_1': 0.925249169435216,
 'eval_f1_weighted': 0.923831481269332,
 'eval_accuracy': 0.9238578680203046,
 'eval_roc_auc': 0.9238578680203046,
 'eval_f1_each_0_median': 0.9230118443316413,
 'eval_f1_each_1_median': 0.9230118443316413,
 'eval_f1_weighted_median': 0.9230118443316413,
 'eval_accuracy_median': 0.9230118443316413,
 'eval_roc_auc_median': 0.9230118443316413,
 'eval_threshold_median': 0.5851811766624451,
 'eval_ground_truth_0': 1182,
 'eval_ground_truth_1': 1182,
 'eval_runtime': 7.82,
 'eval_samples_per_second': 302.3,
 'eval_steps_per_second': 0.384,
 'epoch': 3.481624758220503}

In [14]:
trainer.evaluate(datasets_matched["test"], metric_key_prefix="test")

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


{'test_loss': 0.20134906470775604,
 'test_f1_each_0': 0.9155080213903743,
 'test_f1_each_1': 0.9173121205777685,
 'test_f1_weighted': 0.9164100709840715,
 'test_accuracy': 0.9164198053322048,
 'test_roc_auc': 0.9164198053322048,
 'test_f1_each_0_median': 0.9166314007617435,
 'test_f1_each_1_median': 0.9166314007617435,
 'test_f1_weighted_median': 0.9166314007617435,
 'test_accuracy_median': 0.9166314007617435,
 'test_roc_auc_median': 0.9166314007617435,
 'test_threshold_median': 0.5584132671356201,
 'test_ground_truth_0': 2363,
 'test_ground_truth_1': 2363,
 'test_runtime': 15.073,
 'test_samples_per_second': 313.54,
 'test_steps_per_second': 0.332,
 'epoch': 3.481624758220503}

In [15]:
trainer.evaluate(datasets_matched["unmatched"], metric_key_prefix="unmatched")

early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


{'unmatched_loss': 0.4239790737628937,
 'unmatched_f1_each_0': 0.838589487691284,
 'unmatched_f1_each_1': 0.7747864834756777,
 'unmatched_f1_weighted': 0.8092028160341154,
 'unmatched_accuracy': 0.8119525618169134,
 'unmatched_roc_auc': 0.8039393376283043,
 'unmatched_f1_each_0_median': 0.8381804623415362,
 'unmatched_f1_each_1_median': 0.8248870238863784,
 'unmatched_f1_weighted_median': 0.8320577112911876,
 'unmatched_accuracy_median': 0.8317959848073793,
 'unmatched_roc_auc_median': 0.8338676976544206,
 'unmatched_threshold_median': 0.2178930938243866,
 'unmatched_ground_truth_0': 6959,
 'unmatched_ground_truth_1': 5942,
 'unmatched_runtime': 41.3477,
 'unmatched_samples_per_second': 312.013,
 'unmatched_steps_per_second': 0.314,
 'epoch': 3.481624758220503}

In [None]:
print(classifier)
print(
    "Parameters:\n  conv_layers:",
    sum(
        param.numel()
        for param in classifier.conv_layers.parameters()
        if param.requires_grad
    ),
    "\n  projection:",
    sum(
        param.numel()
        for param in classifier.projection.parameters()
        if param.requires_grad
    ),
    "\n  classifier:",
    sum(
        param.numel()
        for param in classifier.classifier.parameters()
        if param.requires_grad
    ),
    "\n  total:",
    sum(param.numel() for param in classifier.parameters() if param.requires_grad),
)

In [None]:
raise RuntimeError()

In [None]:
import json

print(json.dumps(scores, indent=4))
with open("../logs/luminar/gpt2_first_128-3_epochs.json", "w") as f:
    json.dump(scores, f, indent=4)

In [None]:
datasets = {}
for subset in [
    "blog_authorship_corpus",
    "student_essays",
    "cnn_news",
    "euro_court_cases",
    "house_of_commons",
    "arxiv_papers",
    "gutenberg_en",
    "en",
    "bundestag",
    "spiegel_articles",
    "gutenberg_de",
    "de",
]:
    config_name = f"{subset}-fulltext"
    datasets[config_name] = load_dataset(
        "liberi-luminaris/PrismAI-encoded-gpt2",
        config_name,
        token=HF_TOKEN,
        split="human+gpt_4o_mini",
    )

In [None]:
dataset = load_dataset(
    "liberi-luminaris/PrismAI-fulltext", "cnn_news", split="human+gpt_4o_mini"
)
dataset_human = dataset.filter(lambda sample: sample["agent"] == "human")
source_ids = set(
    dataset_human.shuffle(seed=42).take(len(dataset_human) // 10 * 8)["id_source"]
)
dataset_train = dataset.filter(lambda sample: sample["id_source"] in source_ids)

In [None]:
datasets_truncated = {}
for config_name, dataset in datasets.items():
    datasets_truncated[config_name] = dataset.with_format(
        "numpy", columns=["features"], output_all_columns=True
    ).map(
        lambda batch: {"features": batch["features"][:, :256]},
        batched=True,
    )

In [None]:
datasets_considered = {
    key: value
    for key, value in datasets_truncated.items()
    if not key.startswith("de-") and not key.startswith("en-")
}

In [None]:
import pandas as pd

domains = [
    "Web Blogs",
    "Essays",
    "CNN",
    "ECHR",
    "HoC",
    "arXiv",
    "Gutenberg$_{en}$",
    "Bundestag$_{de}$",
    "Spiegel$_{de}$",
    "Gutenberg$_{de}$",
    "All$_{en}$",
    "All$_{de}$",
]
name_map = {
    "blog_authorship_corpus": "Web Blogs",
    "student_essays": "Essays",
    "cnn_news": "CNN",
    "euro_court_cases": "ECHR",
    "house_of_commons": "HoC",
    "arxiv_papers": "arXiv",
    "gutenberg_en": "Gutenberg$_{en}$",
    "bundestag": "Bundestag$_{de}$",
    "spiegel_articles": "Spiegel$_{de}$",
    "gutenberg_de": "Gutenberg$_{de}$",
    "en": "All$_{en}$",
    "de": "All$_{de}$",
}

results = [
    {"domain": name_map[key.split("-", 1)[0]]}
    | {
        "f1": value["f1"],
        "acc": value["accuracy"],
        "auroc": value["auroc"],
    }
    for key, value in scores.items()
]
metric_df = (
    pd.DataFrame(results)
    .set_index("domain")
    .sort_index(key=lambda x: list(map(domains.index, x)))
)
print(metric_df.to_latex(float_format="%.3f", index=True))
metric_df

In [None]:
# def run_detector(
#     detector: DetectorABC, datasets: dict[str, DatasetDict]
# ) -> dict[str, float]:
#     scores = {}
#     for config_name, ds in tqdm(datasets.items(), desc="Predicting on Datasets"):
#         dataset: Dataset = ds["test"].map(
#             detector.tokenize,
#             input_columns=["text"],
#             batched=True,
#             batch_size=1024,
#             desc="Tokenizing",
#         )
#         dataset = dataset.sort("length")
#         dataset = dataset.map(
#             detector.process,
#             batched=True,
#             batch_size=128,
#             desc="Predicting",
#         )

#         dataset_np = dataset.select_columns(["prediction", "label"]).with_format(
#             "numpy"
#         )

#         acc, f1, auroc = get_scores(dataset_np["label"], dataset_np["prediction"])
#         scores[config_name] = {"accuracy": acc, "f1": f1, "auroc": auroc}

#         acc, f1, auroc = get_scores(
#             dataset_np["label"],
#             dataset_np["prediction"],
#             calibrated=True,
#         )
#         scores[config_name] |= {
#             "accuracy_calibrated": acc,
#             "f1_calibrated": f1,
#             "auroc_calibrated": auroc,
#         }
#     return scores


In [None]:
# def evaluate(model: LuminarClassifier, datasets: dict[str, DatasetDict]) -> dict:
#     scores = {}
#     for config_name, dataset in tqdm(datasets.items(), desc="Evaluating", leave=False):
#         ds = (
#             dataset["test"]
#             .with_format("torch", ["features"])
#             .map(model.process, batched=True, batch_size=32, desc="Predicting")
#         )
#         dataset_np = ds.select_columns(["prediction", "label"]).with_format("numpy")

#         acc, f1, auroc = get_scores(dataset_np["label"], dataset_np["prediction"])
#         scores[config_name] = {
#             "accuracy": acc,
#             "f1": f1,
#             "auroc": auroc,
#         }

#         acc, f1, auroc = get_scores(
#             dataset_np["label"],
#             dataset_np["prediction"],
#             calibrated=True,
#         )
#         scores[config_name] |= {
#             "accuracy_calibrated": acc,
#             "f1_calibrated": f1,
#             "auroc_calibrated": auroc,
#         }

#     return scores