# Luminar

## Baselines: Neural Network Models

In [1]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score


# Source: Ghostbuster, Verma et al. (2024)
def get_scores(labels, probabilities, calibrated=False, precision=6):
    assert len(labels) == len(probabilities)

    if calibrated:
        threshold = sorted(probabilities)[len(labels) - sum(labels) - 1]
    else:
        threshold = 0.5

    acc = round(float(accuracy_score(labels, probabilities > threshold)), precision)
    f1 = round(float(f1_score(labels, probabilities > threshold)), precision)

    if sum(labels) == 0 or sum(labels) == len(labels):
        auroc = -1
    else:
        auroc = round(float(roc_auc_score(labels, probabilities)), precision)

    return acc, f1, auroc

## Data

In [7]:
import gc

from datasets import Dataset, DatasetDict, load_dataset
from tqdm import tqdm

datasets = {}
for config_name in tqdm(
    [
        # "blog_authorship_corpus",
        # "student_essays",
        # "cnn_news",
        # "euro_court_cases",
        # "house_of_commons",
        # "arxiv_papers",
        # "gutenberg_en",
        # "en",
        # "bundestag",
        # "spiegel_articles",
        "gutenberg_de",
        # "de",
    ]
):
    datasets[config_name] = load_dataset(
        "liberi-luminaris/PrismAI",
        f"{config_name}-fulltext-gpt_4o_mini",
    )

  0%|                                                                                                                          | 0/1 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/160 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/40 [00:00<?, ? examples/s]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.10s/it]


### Setup

In [3]:
def run_detector(detector, datasets: dict[str, DatasetDict], batch_size=32):
    scores = {}
    for config_name, dataset in tqdm(datasets.items(), desc="Predicting on Datasets"):
        dataset: Dataset = dataset["test"].map(
            detector.tokenize,
            input_columns=["text"],
            batched=True,
            batch_size=1024,
            desc="Tokenizing",
        )
        dataset = dataset.sort("length")
        dataset = dataset.map(
            detector.process,
            batched=True,
            batch_size=batch_size,
            desc="Predicting",
        )

        dataset_np = dataset.select_columns(["prediction", "label"]).with_format(
            "numpy"
        )

        acc, f1, auroc = get_scores(dataset_np["label"], dataset_np["prediction"])
        scores[config_name] = {
            "accuracy": acc,
            "f1": f1,
            "auroc": auroc,
        }

        acc, f1, auroc = get_scores(
            dataset_np["label"],
            dataset_np["prediction"],
            calibrated=True,
        )
        scores[config_name] |= {
            "accuracy_calibrated": acc,
            "f1_calibrated": f1,
            "auroc_calibrated": auroc,
        }
    return scores


### RoBERTa

In [8]:
# Modified from: RAID, Dugan et al. 2024
# > https://github.com/liamdugan/raid/blob/main/detectors/models/chatgpt_roberta_detector/chatgpt_detector.py

import evaluate
import numpy as np
import torch
from datasets import DatasetDict
from tqdm import tqdm
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    DataCollatorWithPadding,
    Trainer,
    TrainingArguments,
)
from transformers.tokenization_utils_base import BatchEncoding

accuracy = evaluate.load("accuracy")


def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)


class ChatGPTDetector:
    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu"):
        self.device = torch.device(device)
        self.tokenizer = AutoTokenizer.from_pretrained(
            "Hello-SimpleAI/chatgpt-detector-roberta"
        )
        self.model = AutoModelForSequenceClassification.from_pretrained(
            "Hello-SimpleAI/chatgpt-detector-roberta"
        ).to(self.device)

    def reset(self):
        self.model = AutoModelForSequenceClassification.from_pretrained(
            "Hello-SimpleAI/chatgpt-detector-roberta"
        ).to(self.device)

    def tokenize(self, texts: list[str]) -> BatchEncoding:
        return self.tokenizer(
            texts,
            padding=False,
            truncation=True,
            max_length=512,
            return_length=True,
        )

    @torch.inference_mode()
    def predict(self, inputs: dict) -> list[float]:
        encoding = self.tokenizer.pad(inputs, return_tensors="pt").to(self.device)
        outputs = self.model(**encoding)
        probs = outputs.logits.softmax(dim=-1)
        return probs[:, 1].detach().cpu().flatten().tolist()

    def process(self, inputs: dict) -> dict[str, list[float]]:
        return {
            "prediction": self.predict(
                {
                    "input_ids": inputs["input_ids"],
                    "attention_mask": inputs["attention_mask"],
                }
            )
        }

    @torch.inference_mode()
    def inference(self, texts: list) -> list:
        predictions = []
        for text in tqdm(texts):
            inputs = self.tokenizer(text, truncation=True, return_tensors="pt").to(
                self.device
            )
            outputs = self.model(**inputs)
            probs = outputs.logits.softmax(dim=-1)
            real, fake = probs.detach().cpu().flatten().numpy().tolist()
            predictions.append(fake)
        return predictions

    def train(self, dataset: DatasetDict, training_args: TrainingArguments):
        data_collator = DataCollatorWithPadding(tokenizer=self.tokenizer)
        trainer = Trainer(
            self.model,
            training_args,
            train_dataset=dataset["train"],
            eval_dataset=dataset["test"],
            data_collator=data_collator,
            compute_metrics=compute_metrics,
        )

        trainer.train()
        self.model = trainer.model.to(self.device)

In [None]:
def f():
    results = run_detector(ChatGPTDetector(device="cuda:3"), datasets)
    gc.collect()
    torch.cuda.synchronize()
    torch.cuda.empty_cache()
    return results


# scores_roberta = f()
# scores_roberta

In [9]:
import json
from pathlib import Path

scores_roberta_ft = {}

model = ChatGPTDetector(device="cuda:0")
dataset_items = list(datasets.items())
tq = tqdm(dataset_items, desc="Finetuning")
for config, dataset in tq:
    tq.set_postfix_str(config)
    model.reset()

    dataset = dataset.map(
        model.tokenize,
        input_columns=["text"],
        batched=True,
        batch_size=1024,
        desc="Tokenizing",
    )
    train_ds = dataset["train"].train_test_split(test_size=0.1, seed=42)

    training_args = TrainingArguments(
        output_dir=f"../models/chatgpt-detector-roberta/{config}",
        seed=42,
        num_train_epochs=1,
        per_device_train_batch_size=15,
        per_device_eval_batch_size=30,
        eval_strategy="steps",
        eval_steps=50,
        save_strategy="epoch",
        learning_rate=1e-5,
    )
    model.train(train_ds, training_args)

    scores_roberta_ft[config] = run_detector(model, datasets)

    path = Path("../logs/chatgpt-detector-roberta/")
    path.mkdir(parents=True, exist_ok=True)
    with (path / f"{config}.json").open("w") as fp:
        json.dump(scores_roberta_ft[config], fp, indent=4)


print(json.dumps(scores_roberta_ft, indent=4))

Finetuning:   0%|                                                                                                | 0/1 [00:00<?, ?it/s, gutenberg_de]

Tokenizing:   0%|          | 0/160 [00:00<?, ? examples/s]

Tokenizing:   0%|          | 0/40 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Step,Training Loss,Validation Loss




Tokenizing:   0%|          | 0/40 [00:00<?, ? examples/s]

Predicting:   0%|          | 0/40 [00:00<?, ? examples/s]

You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Predicting on Datasets: 100%|██████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.86s/it]
Finetuning: 100%|████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:35<00:00, 35.72s/it, gutenberg_de]

{
    "gutenberg_de": {
        "gutenberg_de": {
            "accuracy": 0.5,
            "f1": 0.0,
            "auroc": 1.0,
            "accuracy_calibrated": 1.0,
            "f1_calibrated": 1.0,
            "auroc_calibrated": 1.0
        }
    }
}





In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['agent', 'label', 'label_str', 'text', 'input_ids', 'attention_mask', 'length'],
        num_rows: 160
    })
    test: Dataset({
        features: ['agent', 'label', 'label_str', 'text', 'input_ids', 'attention_mask', 'length'],
        num_rows: 40
    })
})

### RADAR

In [None]:
# Modified from: RAID, Dugan et al. 2024
# > https://github.com/liamdugan/raid/blob/main/detectors/models/radar/radar.py

import torch
import torch.nn.functional as F
from tqdm import tqdm
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers.tokenization_utils_base import BatchEncoding


class Radar:
    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu"):
        self.device = torch.device(device)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            "TrustSafeAI/RADAR-Vicuna-7B",  # cache_dir=os.environ["CACHE_DIR"]
        )
        self.tokenizer = AutoTokenizer.from_pretrained("TrustSafeAI/RADAR-Vicuna-7B")
        self.model.eval()
        self.model.to(self.device)

    def tokenize(self, texts: list[str]) -> BatchEncoding:
        return self.tokenizer(
            texts,
            padding=False,
            truncation=True,
            max_length=512,
            return_length=True,
        )

    @torch.inference_mode()
    def predict(self, inputs: dict) -> list[float]:
        encoding = self.tokenizer.pad(inputs, return_tensors="pt").to(self.device)
        outputs = self.model(**encoding)
        output_probs = F.log_softmax(outputs.logits, -1)[:, 0].exp().tolist()
        return output_probs

    def process(self, inputs: dict) -> dict[str, list[float]]:
        return {
            "prediction": self.predict(
                {
                    "input_ids": inputs["input_ids"],
                    "attention_mask": inputs["attention_mask"],
                }
            )
        }

    @torch.inference_mode()
    def inference(self, texts: list) -> list:
        predictions = []
        for text in tqdm(texts):
            with torch.no_grad():
                inputs = self.tokenizer(
                    [text],
                    padding=True,
                    truncation=True,
                    max_length=512,
                    return_tensors="pt",
                )
                inputs = {k: v.to(self.device) for k, v in inputs.items()}
                output_probs = (
                    F.log_softmax(self.model(**inputs).logits, -1)[:, 0].exp().tolist()
                )
            predictions.append(output_probs[0])
        return predictions

In [None]:
def f():
    results = run_detector(Radar(device="cuda:3"), datasets)
    gc.collect()
    torch.cuda.synchronize()
    torch.cuda.empty_cache()
    return results


scores_radar = f()
scores_radar

### Binoculars

In [None]:
# Source: RAID, Dugan et al. 2024
# > https://github.com/liamdugan/raid/blob/main/detectors/models/binoculars/utils/metrics.py

import numpy as np
import torch
import transformers

ce_loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
softmax_fn = torch.nn.Softmax(dim=-1)


def perplexity(
    encoding: transformers.BatchEncoding,
    logits: torch.Tensor,
    median: bool = False,
    temperature: float = 1.0,
):
    shifted_logits = logits[..., :-1, :].contiguous() / temperature
    shifted_labels = encoding.input_ids[..., 1:].contiguous()
    shifted_attention_mask = encoding.attention_mask[..., 1:].contiguous()

    if median:
        ce_nan = ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels).masked_fill(
            ~shifted_attention_mask.bool(), float("nan")
        )
        ppl = np.nanmedian(ce_nan.cpu().float().numpy(), 1)

    else:
        ppl = (
            ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels)
            * shifted_attention_mask
        ).sum(1) / shifted_attention_mask.sum(1)
        ppl = ppl.to("cpu").float().numpy()

    return ppl


def entropy(
    p_logits: torch.Tensor,
    q_logits: torch.Tensor,
    encoding: transformers.BatchEncoding,
    pad_token_id: int,
    median: bool = False,
    sample_p: bool = False,
    temperature: float = 1.0,
):
    vocab_size = p_logits.shape[-1]
    total_tokens_available = q_logits.shape[-2]
    p_scores, q_scores = p_logits / temperature, q_logits / temperature

    p_proba = softmax_fn(p_scores).view(-1, vocab_size)

    if sample_p:
        p_proba = torch.multinomial(
            p_proba.view(-1, vocab_size), replacement=True, num_samples=1
        ).view(-1)

    q_scores = q_scores.view(-1, vocab_size)

    ce = ce_loss_fn(input=q_scores, target=p_proba).view(-1, total_tokens_available)
    padding_mask = (encoding.input_ids != pad_token_id).type(torch.uint8)

    if median:
        ce_nan = ce.masked_fill(~padding_mask.bool(), float("nan"))
        agg_ce = np.nanmedian(ce_nan.cpu().float().numpy(), 1)
    else:
        agg_ce = (
            ((ce * padding_mask).sum(1) / padding_mask.sum(1)).to("cpu").float().numpy()
        )

    return agg_ce

In [None]:
# Modified from: RAID, Dugan et al. 2024
# > https://github.com/liamdugan/raid/blob/main/detectors/models/binoculars/binoculars.py

import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer

torch.set_grad_enabled(False)

GLOBAL_BINOCULARS_THRESHOLD = (
    0.9015310749276843  # selected using Falcon-7B and Falcon-7B-Instruct at bfloat16
)
# DEVICE_1 = "cuda:0" if torch.cuda.is_available() else "cpu"
# DEVICE_2 = "cuda:1" if torch.cuda.device_count() > 1 else DEVICE_1
DEVICE_1 = "cuda:2"
DEVICE_2 = "cuda:3"


class Binoculars(object):
    def __init__(
        self,
        observer_name_or_path: str = "tiiuae/falcon-7b",
        performer_name_or_path: str = "tiiuae/falcon-7b-instruct",
        use_bfloat16: bool = True,
        max_token_observed: int = 512,
    ) -> None:
        # assert_tokenizer_consistency(observer_name_or_path, performer_name_or_path)

        self.observer_model = AutoModelForCausalLM.from_pretrained(
            observer_name_or_path,
            device_map={"": DEVICE_1},
            trust_remote_code=True,
            # cache_dir=os.environ["CACHE_DIR"],
            torch_dtype=torch.bfloat16 if use_bfloat16 else torch.float32,
        )
        self.performer_model = AutoModelForCausalLM.from_pretrained(
            performer_name_or_path,
            device_map={"": DEVICE_2},
            trust_remote_code=True,
            # cache_dir=os.environ["CACHE_DIR"],
            torch_dtype=torch.bfloat16 if use_bfloat16 else torch.float32,
        )

        self.observer_model.eval()
        self.performer_model.eval()

        self.tokenizer = AutoTokenizer.from_pretrained(observer_name_or_path)
        if not self.tokenizer.pad_token:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        self.max_token_observed = max_token_observed

    def tokenize(self, texts: list[str]) -> BatchEncoding:
        return self.tokenizer(
            texts,
            padding=False,
            truncation=True,
            max_length=self.max_token_observed,
            return_length=True,
            return_token_type_ids=False,
        )

    @torch.inference_mode()
    def _get_logits(
        self, encodings: transformers.BatchEncoding
    ) -> tuple[torch.Tensor, torch.Tensor]:
        observer_logits = self.observer_model(
            **encodings.to(self.observer_model.device)
        ).logits
        performer_logits = self.performer_model(
            **encodings.to(self.performer_model.device)
        ).logits
        if DEVICE_1 != "cpu":
            torch.cuda.synchronize()
        return observer_logits, performer_logits

    @torch.inference_mode()
    def predict(self, inputs: dict) -> list[float]:
        encodings = self.tokenizer.pad(inputs, return_tensors="pt")
        observer_logits, performer_logits = self._get_logits(encodings)
        ppl = perplexity(encodings, performer_logits)
        x_ppl = entropy(
            observer_logits.to(DEVICE_1),
            performer_logits.to(DEVICE_1),
            encodings.to(DEVICE_1),
            self.tokenizer.pad_token_id,
        )
        binoculars_scores = ppl / x_ppl
        return binoculars_scores.tolist()

    def process(self, inputs: dict) -> dict[str, list[float]]:
        return {
            "prediction": self.predict(
                {
                    "input_ids": inputs["input_ids"],
                    "attention_mask": inputs["attention_mask"],
                }
            )
        }

In [None]:
from datasets import disable_caching

disable_caching()

In [None]:
def f():
    results = run_detector(Binoculars(), datasets, batch_size=16)
    torch.cuda.synchronize()
    torch.cuda.empty_cache()
    gc.collect()
    torch.cuda.empty_cache()
    return results


scores_binoculars = f()
scores_binoculars

### E5-Small LoRA


In [None]:
# Modified from: RAID, Dugan et al. 2024
# > https://github.com/liamdugan/raid/blob/main/detectors/models/radar/radar.py

import torch
import torch.nn.functional as F
from tqdm import tqdm
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers.tokenization_utils_base import BatchEncoding


class E5Lora:
    def __init__(self, device="cuda" if torch.cuda.is_available() else "cpu"):
        self.device = torch.device(device)
        self.model = AutoModelForSequenceClassification.from_pretrained(
            "MayZhou/e5-small-lora-ai-generated-detector"
        )
        self.tokenizer = AutoTokenizer.from_pretrained(
            "MayZhou/e5-small-lora-ai-generated-detector"
        )
        self.model.eval()
        self.model.to(self.device)

    def tokenize(self, texts: list[str]) -> BatchEncoding:
        return self.tokenizer(
            texts,
            padding=False,
            truncation=True,
            max_length=512,
            return_length=True,
        )

    @torch.inference_mode()
    def predict(self, inputs: dict) -> list[float]:
        encoding = self.tokenizer.pad(inputs, return_tensors="pt").to(self.device)
        outputs = self.model(**encoding)
        output_probs = F.log_softmax(outputs.logits, -1)[:, 0].exp().tolist()
        return output_probs

    def process(self, inputs: dict) -> dict[str, list[float]]:
        return {
            "prediction": self.predict(
                {
                    "input_ids": inputs["input_ids"],
                    "attention_mask": inputs["attention_mask"],
                }
            )
        }

    @torch.inference_mode()
    def inference(self, texts: list) -> list:
        predictions = []
        for text in tqdm(texts):
            with torch.no_grad():
                inputs = self.tokenizer(
                    [text],
                    padding=True,
                    truncation=True,
                    max_length=512,
                    return_tensors="pt",
                )
                inputs = {k: v.to(self.device) for k, v in inputs.items()}
                output_probs = (
                    F.log_softmax(self.model(**inputs).logits, -1)[:, 0].exp().tolist()
                )
            predictions.append(output_probs[0])
        return predictions

In [None]:
import json


def f():
    results = run_detector(E5Lora(device="cuda:0"), datasets)
    gc.collect()
    torch.cuda.synchronize()
    torch.cuda.empty_cache()
    return results


scores_e5 = f()

with open("../logs/e5-small-lora.json", "w") as fp:
    json.dump(scores_e5, fp, indent=4)

print(json.dumps(scores_e5, indent=4))

In [11]:
domains = [
    "Web Blogs",
    "Essays",
    "CNN",
    "ECHR",
    "HoC",
    "arXiv",
    "Gutenberg$_{en}$",
    "Bundestag$_{de}$",
    "Spiegel$_{de}$",
    "Gutenberg$_{de}$",
    "All$_{en}$",
    "All$_{de}$",
]

name_map = {
    "blog_authorship_corpus": "Web Blogs",
    "student_essays": "Essays",
    "cnn_news": "CNN",
    "euro_court_cases": "ECHR",
    "house_of_commons": "HoC",
    "arxiv_papers": "arXiv",
    "gutenberg_en": "Gutenberg$_{en}$",
    "bundestag": "Bundestag$_{de}$",
    "spiegel_articles": "Spiegel$_{de}$",
    "gutenberg_de": "Gutenberg$_{de}$",
    "en": "All$_{en}$",
    "de": "All$_{de}$",
}

In [17]:
import pandas as pd
import json
from pathlib import Path

from collections import defaultdict

results = defaultdict(dict)
for path in Path("../logs/").iterdir():
    if path.suffix == ".json":
        with path.open("r") as fp:
            data = json.load(fp)
        model_name = path.stem
        for domain, scores in data.items():
            results[name_map[domain]].update(
                {
                    model_name + "_f1": scores["f1"],
                    model_name + "_accuracy": scores["accuracy"],
                    model_name + "_auroc": scores["auroc"],
                }
            )

model_name = "roberta-ft"
for domain, name in name_map.items():
    path = Path("../logs/chatgpt-detector-roberta/") / (domain + ".json")
    with (path).open("r") as fp:
        data = json.load(fp)
    results[name].update(
        {
            model_name + "_f1": data[domain]["f1"],
            model_name + "_accuracy": data[domain]["accuracy"],
            model_name + "_auroc": data[domain]["auroc"],
        }
    )

metric_df = (
    pd.DataFrame([{"domain": domain} | dd for domain, dd in results.items()])
    .set_index("domain")
    .sort_index(key=lambda x: list(map(domains.index, x)))
)
print(metric_df.to_latex(float_format="%.3f", index=True))
metric_df

\begin{tabular}{lrrrrrrrrrrrrrrr}
\toprule
 & roberta_f1 & roberta_accuracy & roberta_auroc & binoculars_f1 & binoculars_accuracy & binoculars_auroc & e5-small-lora_f1 & e5-small-lora_accuracy & e5-small-lora_auroc & radar_f1 & radar_accuracy & radar_auroc & roberta-ft_f1 & roberta-ft_accuracy & roberta-ft_auroc \\
domain &  &  &  &  &  &  &  &  &  &  &  &  &  &  &  \\
\midrule
Web Blogs & 0.481 & 0.658 & 0.998 & 0.667 & 0.500 & 0.126 & 0.022 & 0.328 & 0.147 & 0.670 & 0.587 & 0.524 & 0.996 & 0.996 & 1.000 \\
Essays & 0.311 & 0.555 & 0.672 & 0.667 & 0.500 & 0.000 & 0.003 & 0.407 & 0.135 & 0.765 & 0.752 & 0.815 & 1.000 & 1.000 & 1.000 \\
CNN & 0.487 & 0.644 & 0.923 & 0.666 & 0.500 & 0.000 & 0.001 & 0.360 & 0.016 & 0.954 & 0.955 & 0.991 & 0.999 & 0.999 & 1.000 \\
ECHR & 0.125 & 0.533 & 0.747 & 0.652 & 0.484 & 0.001 & 0.000 & 0.482 & 0.038 & 0.975 & 0.975 & 0.992 & 0.998 & 0.998 & 1.000 \\
HoC & 0.334 & 0.600 & 0.944 & 0.652 & 0.484 & 0.027 & 0.000 & 0.178 & 0.006 & 0.884 & 0.868 & 0.907 &

Unnamed: 0_level_0,roberta_f1,roberta_accuracy,roberta_auroc,binoculars_f1,binoculars_accuracy,binoculars_auroc,e5-small-lora_f1,e5-small-lora_accuracy,e5-small-lora_auroc,radar_f1,radar_accuracy,radar_auroc,roberta-ft_f1,roberta-ft_accuracy,roberta-ft_auroc
domain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Web Blogs,0.480929,0.658297,0.99795,0.666667,0.5,0.126145,0.02224,0.328057,0.146573,0.670153,0.58679,0.523931,0.995619,0.995633,0.999977
Essays,0.311258,0.555188,0.672401,0.666667,0.5,0.00023,0.003018,0.407492,0.134682,0.765422,0.751587,0.814824,0.999862,0.999862,1.0
CNN,0.486569,0.644096,0.92327,0.666479,0.499788,0.000113,0.001321,0.360135,0.015566,0.954192,0.955142,0.99115,0.999365,0.999365,0.999995
ECHR,0.124533,0.533201,0.747456,0.652349,0.484064,0.001084,0.0,0.482072,0.038128,0.9747,0.974768,0.992053,0.998004,0.998008,0.999986
HoC,0.333566,0.599749,0.943618,0.652358,0.484074,0.027122,0.0,0.178122,0.005816,0.883617,0.868399,0.906709,0.993684,0.993713,0.999434
arXiv,0.124183,0.533101,0.809661,0.663873,0.496864,6.4e-05,0.002356,0.409756,0.019123,0.954002,0.955749,0.996115,0.995101,0.995122,0.998943
Gutenberg$_{en}$,0.183486,0.550505,0.980864,0.666667,0.5,0.003356,0.010753,0.070707,0.006031,0.849673,0.845118,0.917639,0.947917,0.949495,0.989071
Bundestag$_{de}$,0.0,0.5,0.891183,0.666667,0.5,0.00981,0.371777,0.564115,0.665523,0.127273,0.499006,0.753252,0.983187,0.98335,0.998422
Spiegel$_{de}$,0.003259,0.500816,0.613273,0.666485,0.499796,0.008735,0.426394,0.347409,0.273244,0.044392,0.50816,0.628063,0.983894,0.983884,0.995226
Gutenberg$_{de}$,0.0,0.5,1.0,0.666667,0.5,0.03,0.391304,0.3,0.3275,0.536585,0.525,0.5575,0.0,0.5,1.0


In [19]:
results = defaultdict(dict)

model_name = "roberta-ft"
for domain, name in name_map.items():
    path = Path("../logs/chatgpt-detector-roberta/") / (domain + ".json")
    with (path).open("r") as fp:
        data = json.load(fp)
    results[name].update(
        {
            model_name + "_f1": data[domain]["f1"],
            # model_name + "_accuracy": data[domain]["accuracy"],
            model_name + "_auroc": data[domain]["auroc"],
        }
    )

metric_df = (
    pd.DataFrame([{"domain": domain} | dd for domain, dd in results.items()])
    .set_index("domain")
    .sort_index(key=lambda x: list(map(domains.index, x)))
)
print(metric_df.to_latex(float_format="%.3f", index=True))
metric_df

\begin{tabular}{lrr}
\toprule
 & roberta-ft_f1 & roberta-ft_auroc \\
domain &  &  \\
\midrule
Web Blogs & 0.996 & 1.000 \\
Essays & 1.000 & 1.000 \\
CNN & 0.999 & 1.000 \\
ECHR & 0.998 & 1.000 \\
HoC & 0.994 & 0.999 \\
arXiv & 0.995 & 0.999 \\
Gutenberg$_{en}$ & 0.948 & 0.989 \\
Bundestag$_{de}$ & 0.983 & 0.998 \\
Spiegel$_{de}$ & 0.984 & 0.995 \\
Gutenberg$_{de}$ & 0.000 & 1.000 \\
All$_{en}$ & 0.999 & 1.000 \\
All$_{de}$ & 0.994 & 1.000 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,roberta-ft_f1,roberta-ft_auroc
domain,Unnamed: 1_level_1,Unnamed: 2_level_1
Web Blogs,0.995619,0.999977
Essays,0.999862,1.0
CNN,0.999365,0.999995
ECHR,0.998004,0.999986
HoC,0.993684,0.999434
arXiv,0.995101,0.998943
Gutenberg$_{en}$,0.947917,0.989071
Bundestag$_{de}$,0.983187,0.998422
Spiegel$_{de}$,0.983894,0.995226
Gutenberg$_{de}$,0.0,1.0
