# Luminar

## Minimal Code

In [13]:
import os.path

import torch

from typing import Iterable, NamedTuple
from pathlib import Path
from datasets import Dataset
from torch import nn
from torch.utils.data import DataLoader, Subset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
)

### Luminar Encoder

1. Pre-Process Inputs: tokenize and pass through LLM, recording hidden states
2. Calculate _Intermediate Likelihoods_: pass each hidden state through the models LM head

In [14]:
class LuminarEncoder:
    def __init__(
        self,
        feature_dim: int = 256,
        model_name_or_path: str = "gpt2",
        device: str = ("cuda:0" if torch.cuda.is_available() else "cpu"),
    ):
        self.feature_dim = feature_dim
        self.device = torch.device(device)

        self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(
            model_name_or_path
        )
        if not hasattr(self.tokenizer, "pad_token") or self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
        self.pad_token_id = self.tokenizer.pad_token_id

        self.model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
            model_name_or_path
        )
        self.model = self.model.to(self.device)

        if hasattr(self.model, "lm_head"):
            self.model_lm_head: nn.Linear = self.model.lm_head
        elif hasattr(self.model.model, "lm_head"):
            self.model_lm_head: nn.Linear = self.model.model.lm_head
        else:
            raise ValueError("Could not find lm_head in model")

    def __call__(self, batch: dict[str, list[str]]) -> dict[str, list[torch.Tensor]]:
        return {"features": self.process(batch["text"])}

    def process(self, batch: list[str]) -> list[torch.Tensor]:
        encoding = self.tokenizer(
            batch,
            padding=True,
            truncation=True,
            max_length=self.feature_dim,
            return_tensors="pt",
        )
        batch_hidden_states = self.forward(encoding.input_ids, encoding.attention_mask)

        intermediate_likelihoods = []
        for input_ids, hidden_states in zip(encoding.input_ids, batch_hidden_states):
            intermediate_likelihoods.append(
                self.compute_intermediate_likelihoods(input_ids, hidden_states)
            )

        return intermediate_likelihoods

    @torch.no_grad()
    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask: torch.Tensor,
    ) -> Iterable[tuple[torch.Tensor, ...]]:
        outputs = self.model(
            input_ids=input_ids.to(self.device),
            attention_mask=attention_mask.to(self.device),
            output_hidden_states=True,
        )

        # unpack hidden states to get one list of tensors per input sequence,
        # instead of one hidden state per layer in the model
        return zip(*outputs.hidden_states)  # type: ignore

    @torch.no_grad()
    def compute_intermediate_likelihoods(
        self,
        input_ids: torch.Tensor,
        hidden_states: tuple[torch.Tensor],
    ) -> torch.Tensor:
        labels = input_ids[1:].view(-1, 1)

        seq_length = min(labels.ne(self.pad_token_id).sum(), self.feature_dim)
        labels = labels[:seq_length].to(self.device)

        intermediate_likelihoods = []
        for hs in hidden_states:
            hs: torch.Tensor = hs[:seq_length].to(self.device)
            il = (
                # get layer logits
                self.model_lm_head(hs)
                # calculate likelihoods
                .softmax(-1)
                # get likelihoods of input tokens
                .gather(-1, labels)
                .squeeze(-1)
                .cpu()
            )
            del hs

            # pad with zeros if sequence is shorter than required feature_dim
            if seq_length < self.feature_dim:
                il = torch.cat([il, torch.zeros(self.feature_dim - seq_length)])

            intermediate_likelihoods.append(il)

        # stack intermediate likelihoods to get tensor of shape (feature_dim, num_layers)
        return torch.stack(intermediate_likelihoods, dim=1)

### Luminar Classifier

CNN-based classifier using _Intermediate Likelihoods_ as input features.
Here, we utilize these inherently 2D values (`seq_len * num_layers`) as 1D inputs where the second dimension is treated as input channels.

In [15]:
class ConvolutionalLayerSpec(NamedTuple):
    channels: int
    kernel_size: int | tuple[int, int]
    stride: int = 1

    @property
    def kernel_size_1d(self):
        if isinstance(self.kernel_size, int):
            return self.kernel_size
        return self.kernel_size[0]

    @property
    def kernel_size_2d(self):
        if isinstance(self.kernel_size, int):
            return (self.kernel_size, self.kernel_size)
        return self.kernel_size

    @property
    def padding(self) -> int:
        return (self.kernel_size_1d - 1) // 2

    def __repr__(self):
        return repr(tuple(self))


DEFAULT_CONV_LAYER_SHAPES = ((64, 5), (128, 3), (128, 3), (128, 3), (64, 3))


class LuminarClassifier(nn.Module):
    def __init__(
        self,
        conv_layer_shapes: Iterable[ConvolutionalLayerSpec] = DEFAULT_CONV_LAYER_SHAPES,
        projection_dim: int | None = None,
    ):
        super().__init__()
        self.conv_layers = nn.Sequential()
        for conv in conv_layer_shapes:
            conv = ConvolutionalLayerSpec(*conv)
            self.conv_layers.append(
                nn.LazyConv1d(
                    conv.channels,
                    conv.kernel_size,  # type: ignore
                    conv.stride,
                    conv.padding,
                ),
            )
            self.conv_layers.append(
                nn.LeakyReLU(),
            )
        self.conv_layers.append(nn.Flatten())

        if projection_dim:
            self.projection = nn.Sequential(
                nn.LazyLinear(projection_dim), nn.LeakyReLU()
            )
        else:
            self.projection = nn.Identity()

        self.classifier = nn.LazyLinear(1)

    def forward(self, features: torch.Tensor):
        # We are using 2D features (so `features` is a 3D tensor)
        # but we want to treat the second feature dimension as channels.
        # Thus, we need to transpose the tensor here
        features = features.transpose(1, 2)

        for layer in self.conv_layers:
            features = layer(features)

        return self.classifier(self.projection(features.flatten(1)))


### Encode Samples

In [16]:
encoder = LuminarEncoder(128, model_name_or_path="gpt2")

In [17]:
notebook_dir = Path.cwd()
project_root = notebook_dir.parent.parent

MODEL_VERSION = 'luminar_classifier_RAID.pt'
MODEL_PATH = project_root / 'luminar' / 'models' / MODEL_VERSION
print(MODEL_PATH)


/home/staff_homes/kboenisc/home/prismAI/PrismAI/src/luminar/models/luminar_classifier_RAID.pt


In [18]:
model = LuminarClassifier()
model.load_state_dict(torch.load(MODEL_PATH))
model = model.to(encoder.device)
model.eval()

LuminarClassifier(
  (conv_layers): Sequential(
    (0): LazyConv1d(0, 64, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): LeakyReLU(negative_slope=0.01)
    (2): LazyConv1d(0, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (3): LeakyReLU(negative_slope=0.01)
    (4): LazyConv1d(0, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (5): LeakyReLU(negative_slope=0.01)
    (6): LazyConv1d(0, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (7): LeakyReLU(negative_slope=0.01)
    (8): LazyConv1d(0, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (9): LeakyReLU(negative_slope=0.01)
    (10): Flatten(start_dim=1, end_dim=-1)
  )
  (projection): Identity()
  (classifier): LazyLinear(in_features=0, out_features=1, bias=True)
)

In [19]:
from raid import run_detection, run_evaluation
from raid.utils import load_data

In [20]:
from tqdm import tqdm

ROUND_PREDICTIONS = True

def my_detector(texts: list[str], batch_size: int = 128) -> list[float]:
    y_pred = []

    for i in tqdm(range(0, len(texts), batch_size), desc="Detecting"):
        batch_texts = texts[i:i+batch_size]

        with torch.no_grad():
            encoded = encoder({"text": batch_texts})
            features: list[torch.Tensor] = encoded["features"]
            features_batch = torch.stack(features).to(encoder.device)

            logits = model(features_batch)
            if ROUND_PREDICTIONS:
                pred = logits.sigmoid().round().squeeze(-1)
            else:
                pred = logits.sigmoid().squeeze(-1)

            y_pred.extend(pred.tolist())

    return y_pred

In [21]:
# Load RAID test
INCLUDE_ADVERSARIAL = True

test_df = load_data(split="test", include_adversarial=INCLUDE_ADVERSARIAL)#, include_adversarial=False)
test_df

Unnamed: 0,id,generation
0,64005577-3d63-4583-8945-7541d3e53e7d,"The Sunspot Number, created by R.Wolf in 184..."
1,c2b9df67-4e29-45ca-bdcc-7065fb907b77,We present several analogies between convex ...
2,07904f22-8530-4d3b-bf49-6bd1642d89f7,Let H be a homology theory for algebraic var...
3,dc5aa023-6f57-4f9c-833a-c0f322a994fa,"The two parallel concepts of ""small"" sets of..."
4,1b1ab19b-fe6f-458d-a666-06bbc1791534,We present new solutions to the strong explo...
...,...,...
671995,b2694dd7-1c4d-4bef-8e52-0c1e13d54130,R​o​n​n​i​e​ ​N​u​n​n​ ​i​s​ ​a​n​ ​A​m​e​r​i​...
671996,9e5c1a37-9305-4ca7-8dc0-ab1ed763231e,L​e​s​t​e​r​ ​M​.​ ​W​o​l​f​s​o​n​ ​w​a​s​ ​a​...
671997,a233aa5d-b375-423f-ad7a-ffc5045398c5,L​e​s​t​e​r​ ​M​.​ ​W​o​l​f​s​o​n​ ​w​a​s​ ​a​...
671998,e267ebb4-b1f7-4af4-b68b-ecd4ba565f93,J​u​s​t​i​n​i​a​n​ ​R​w​e​y​e​m​a​m​u​ ​(​b​o​...


In [22]:
predictions = run_detection(my_detector, test_df)
predictions

Detecting: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 5250/5250 [1:40:01<00:00,  1.14s/it]


[{'id': '64005577-3d63-4583-8945-7541d3e53e7d', 'score': 1.0},
 {'id': 'c2b9df67-4e29-45ca-bdcc-7065fb907b77', 'score': 1.0},
 {'id': '07904f22-8530-4d3b-bf49-6bd1642d89f7', 'score': 1.0},
 {'id': 'dc5aa023-6f57-4f9c-833a-c0f322a994fa', 'score': 1.0},
 {'id': '1b1ab19b-fe6f-458d-a666-06bbc1791534', 'score': 1.0},
 {'id': 'a5dd2abc-eac8-485a-ab60-ee5fa52e745d', 'score': 1.0},
 {'id': 'f3f0cbdb-3722-4638-8802-a69f02348f8b', 'score': 1.0},
 {'id': 'ca55de02-a8d3-41f8-b427-949ae9e244b4', 'score': 1.0},
 {'id': '62b5f5fb-bf37-481d-9783-c40c4344b8e0', 'score': 1.0},
 {'id': '3be2c087-53e6-43c8-9b0e-7b1455d8b01e', 'score': 1.0},
 {'id': '6741c3b6-9384-48b5-8718-cad2b1aeaeef', 'score': 1.0},
 {'id': '84eeb1ae-e149-4511-816b-e6e43b118c75', 'score': 1.0},
 {'id': 'b7008344-ed7a-4a03-b00a-389bce40eb4a', 'score': 1.0},
 {'id': '56eb816c-028e-4e62-99f9-ebc486639d8f', 'score': 1.0},
 {'id': '4d85935c-4200-45ce-beed-1fb40ca9b8bf', 'score': 1.0},
 {'id': '731602fe-9553-4a6a-a172-3dad84cc7641', 'score'

In [23]:
import json
import os

filename = f"RAID_predictions_{'with' if INCLUDE_ADVERSARIAL else 'no'}_adversarial{'_rounded' if ROUND_PREDICTIONS else ''}.json"

directory = os.path.splitext(MODEL_VERSION)[0]
if not os.path.exists(directory):
    os.makedirs(directory)

output = os.path.join(directory, filename)
print(output)
with open(output, 'w') as f:
    json.dump(predictions, f)

luminar_classifier_RAID/RAID_predictions_with_adversarial_rounded.json
