# Luminar

## Minimal Code

In [None]:
from typing import Iterable, NamedTuple

import torch
from datasets import Dataset
from torch import nn
from torch.utils.data import DataLoader, Subset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
)

### Luminar Encoder

1. Pre-Process Inputs: tokenize and pass through LLM, recording hidden states
2. Calculate _Intermediate Likelihoods_: pass each hidden state through the models LM head

In [None]:
class LuminarEncoder:
    def __init__(
        self,
        max_length: int = 256,
        model_name_or_path: str = "gpt2",
        device: str = ("cuda" if torch.cuda.is_available() else "cpu"),
    ):
        self.max_length = max_length
        self.device = torch.device(device)

        self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(
            model_name_or_path
        )
        if not hasattr(self.tokenizer, "pad_token") or self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        self.pad_token_id = self.tokenizer.pad_token_id

        self.model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
            model_name_or_path
        )
        self.model = self.model.to(self.device)

        if hasattr(self.model, "lm_head"):
            self.model_lm_head: nn.Linear = self.model.lm_head
        elif hasattr(self.model.model, "lm_head"):
            self.model_lm_head: nn.Linear = self.model.model.lm_head
        else:
            raise ValueError("Could not find lm_head in model")

    def __call__(self, batch: dict[str, list[str]]) -> dict[str, list[torch.Tensor]]:
        return {"features": self.process(batch["text"])}

    def process(self, batch: list[str]) -> list[torch.Tensor]:
        encoding = self.tokenizer(
            batch,
            padding=True,
            truncation=True,
            max_length=self.max_length,
            return_length=True,
            return_tensors="pt",
        )
        batch_hidden_states = self.forward(encoding.input_ids, encoding.attention_mask)

        intermediate_likelihoods = []
        for input_ids, length, hidden_states in zip(encoding.input_ids, encoding.length, batch_hidden_states):
            intermediate_likelihoods.append(
                self.compute_intermediate_likelihoods(input_ids, hidden_states)[:length]
            )

        return intermediate_likelihoods

    @torch.inference_mode()
    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: torch.Tensor,
    ) -> Iterable[tuple[torch.Tensor, ...]]:
        outputs = self.model(
            input_ids=input_ids.to(self.device),
            attention_mask=attention_mask.to(self.device),
            output_hidden_states=True,
        )

        # unpack hidden states to get one list of tensors per input sequence,
        # instead of one hidden state per layer in the model
        return zip(*outputs.hidden_states)  # type: ignore

    @torch.inference_mode()
    def compute_intermediate_likelihoods(
        self,
        input_ids: torch.Tensor,
        hidden_states: tuple[torch.Tensor],
    ) -> torch.Tensor:
        labels = input_ids[1:].view(-1, 1)

        seq_length = min(labels.ne(self.pad_token_id).sum(), self.max_length)
        labels = labels[:seq_length].to(self.device)

        intermediate_likelihoods = []
        for hs in hidden_states:
            hs: torch.Tensor = hs[:seq_length].to(self.device)
            il = (
                # get layer logits
                self.model_lm_head(hs)
                # calculate likelihoods
                .softmax(-1)
                # get likelihoods of input tokens
                .gather(-1, labels)
                .squeeze(-1)
                .cpu()
            )
            
            hs.cpu()
            del hs

            # pad with zeros if sequence is shorter than required max_length
            if seq_length < self.max_length:
                il = torch.cat([il, torch.zeros(self.max_length - seq_length)])

            intermediate_likelihoods.append(il)

        # stack intermediate likelihoods to get tensor of shape (max_length, num_layers)
        return torch.stack(intermediate_likelihoods, dim=1)

### Luminar Classifier

CNN-based classifier using _Intermediate Likelihoods_ as input features.
Here, we utilize these inherently 2D values (`seq_len * num_layers`) as 1D inputs where the second dimension is treated as input channels.

In [None]:
class ConvolutionalLayerSpec(NamedTuple):
    channels: int
    kernel_size: int | tuple[int, int]
    stride: int = 1

    @property
    def kernel_size_1d(self):
        if isinstance(self.kernel_size, int):
            return self.kernel_size
        return self.kernel_size[0]

    @property
    def kernel_size_2d(self):
        if isinstance(self.kernel_size, int):
            return (self.kernel_size, self.kernel_size)
        return self.kernel_size

    @property
    def padding(self) -> int:
        return (self.kernel_size_1d - 1) // 2

    def __repr__(self):
        return repr(tuple(self))


DEFAULT_CONV_LAYER_SHAPES = ((64, 5), (128, 3), (128, 3), (128, 3), (64, 3))


class LuminarClassifier(nn.Module):
    def __init__(
        self,
        conv_layer_shapes: Iterable[ConvolutionalLayerSpec] = DEFAULT_CONV_LAYER_SHAPES,
        projection_dim: int | None = None,
    ):
        super().__init__()
        self.conv_layers = nn.Sequential()
        for conv in conv_layer_shapes:
            conv = ConvolutionalLayerSpec(*conv)
            self.conv_layers.append(
                nn.LazyConv1d(
                    conv.channels,
                    conv.kernel_size,  # type: ignore
                    conv.stride,
                    conv.padding,
                ),
            )
            self.conv_layers.append(
                nn.LeakyReLU(),
            )
        self.conv_layers.append(nn.Flatten())

        if projection_dim:
            self.projection = nn.Sequential(
                nn.LazyLinear(projection_dim), nn.LeakyReLU()
            )
        else:
            self.projection = nn.Identity()

        self.classifier = nn.LazyLinear(1)

    def forward(self, features: torch.Tensor):
        # We are using 2D features (so `features` is a 3D tensor)
        # but we want to treat the second feature dimension as channels.
        # Thus, we need to transpose the tensor here
        features = features.transpose(1, 2)

        for layer in self.conv_layers:
            features = layer(features)

        return self.classifier(self.projection(features.flatten(1)))


## Example

### Prepare Data

In [None]:
from pathlib import Path
from tqdm.auto import tqdm
from datasets import load_dataset

from luminar.utils import get_matched_datasets

HF_TOKEN = (Path.home() / ".hf_token").read_text().strip()

agent = "gpt_4o_mini"
other_agents = "gemma2_9b"
datasets = {}
num_proc = 32
for domain in tqdm(
    [
        "blog_authorship_corpus",
        "student_essays",
        "cnn_news",
        "euro_court_cases",
        "house_of_commons",
        "arxiv_papers",
        "gutenberg_en",
        "bundestag",
        "spiegel_articles",
        # "gutenberg_de",
        "en",
        "de",
    ]
):
    datset_config_name = f"{domain}-fulltext"
    dataset_split_name = f"human+{agent}+{other_agents}"
    dataset: Dataset = (
        load_dataset(
            "liberi-luminaris/PrismAI",
            datset_config_name,
            split=dataset_split_name,
            token=HF_TOKEN,
        )  # type: ignore
        .rename_column("label", "labels")
        .filter(
            lambda text: len(text.strip()) > 0,
            input_columns=["text"],
            num_proc=num_proc,
        )
    )
    datasets_matched, dataset_unmatched = get_matched_datasets(
        dataset, agent, num_proc=num_proc
    )
    datasets_matched["unmatched"] = dataset_unmatched
    datasets[domain] = datasets_matched
del dataset
datasets

### Encode Samples

In [None]:
encoder = LuminarEncoder(512, model_name_or_path="gpt2", device="cuda:3")

In [None]:
# encoded_datasets = {}
tq = tqdm(datasets.items(), desc="Encoding datasets")
for config, dataset in tq:
    path = f"/storage/projects/stoeckel/prismai/encoded/fulltext/{'_'.join([agent, other_agents])}/gpt2_{encoder.max_length}/{config}/"
    Path(path).mkdir(parents=True, exist_ok=True)
    dataset: Dataset
    tq.set_description_str(f"Encoding {config}")
    # encoded_datasets[config] =
    dataset.map(encoder, batched=True, batch_size=128).save_to_disk(path)

### Run Training

In [None]:
model = LuminarClassifier()
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
from tqdm import tqdm


train_dataset = dataset["train"].with_format("torch", ["features", "label"])
for batch in tqdm(
    DataLoader(train_dataset, 32)
):
    optimizer.zero_grad()
    features = batch["features"]
    labels = batch["label"].float().unsqueeze(-1)

    preds = model(features)

    loss = criterion(preds, labels)

    loss.backward()
    optimizer.step()

In [None]:
import numpy as np
from sklearn.metrics import f1_score


y_pred, y_truth, losses = [], [], []
test_dataset = dataset["test"].with_format("torch", ["features", "label"])
for batch in tqdm(DataLoader(test_dataset, 32)):
    with torch.no_grad():
        features = batch["features"]
        labels = batch["label"].float().unsqueeze(-1)
        preds = model(features)

        y_pred.extend(preds.sigmoid().round().squeeze().tolist())
        y_truth.extend(labels.squeeze().tolist())

        loss = criterion(preds, labels)
        losses.append(loss.item())

print(f"loss={np.mean(losses)}")
print(f"f1={f1_score(y_truth, y_pred)}")