In [1]:
import copy
import importlib
import os
import warnings
from itertools import product

import torch
from dotenv import load_dotenv
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch import loggers as pl_loggers
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.tuner import Tuner
from tqdm.auto import tqdm

load_dotenv("../env")

warnings.filterwarnings("ignore", ".*does not have many workers.*")

In [2]:
import luminar.document.data
import luminar.document.model
import luminar.features
import luminar.mongo

importlib.reload(luminar.document.data)
importlib.reload(luminar.document.model)
importlib.reload(luminar.features)
importlib.reload(luminar.mongo)

<module 'luminar.mongo' from '/nvme/projects/PrismAI/PrismAI/src/luminar/mongo.py'>

In [3]:
from luminar.document.data import (
    DocumentClassificationDataModule,
    FeatureDataset,
    PaddingDataloader,
    n_way_split,
)
from luminar.document.model import ConvolutionalLayerSpec, DocumentClassficationModel
from luminar.features import FeatureExtractor, OneDimFeatures, Slicer, TwoDimFeatures
from luminar.mongo import PrismaiDataset

In [4]:
domains = {
    "Blog Authorship": {"domain": "blog_authorship_corpus"},
    "Student Essays": {"domain": "student_essays"},
    "CNN News": {"domain": "cnn_news"},
    "Euro Court Cases": {"domain": "euro_court_cases"},
    "House of Commons": {"domain": "house_of_commons"},
    "ArXiv Papers": {"domain": "arxiv_papers"},
    "Gutenberg": {"domain": "gutenberg", "lang": "en-EN"},
    "Bundestag [DE]": {"domain": "bundestag"},
    "Spiegel [DE]": {"domain": "spiegel_articles"},
    # "Gutenberg [DE]": {"domain": "gutenberg", "lang": "de-DE"},
}

In [5]:
dmap = {
    "Blog Authorship": "Blog Authorship",
    "Student Essays": "Student Essays",
    "CNN News": "CNN News",
    "Euro Court Cases": "Euro Court Cases",
    "House of Commons": "House of Commons",
    "ArXiv Papers": "ArXiv Papers",
    "Gutenberg [EN]": "Gutenberg",
    "Bundestag": "Bundestag [DE]",
    "Spiegel": "Spiegel [DE]",
    # "Gutenberg [DE]": 
}

In [6]:
config = {
    "seed": 1337,
    "eval_split": 0.1,
    "test_split": 0.1,
    "feature_model": "gpt2",
    "synth_agent": "gpt-4o-mini",
    # "synth_agent": "gemma2:9b"
}

In [7]:
datasets = {
    domain: PrismaiDataset(
        mongo_db_connection=os.environ.get("MONGO_DB_CONNECTION"),
        database="prismai",
        collection="features_prismai",
        feature_model=config["feature_model"],
        synth_agent=config["synth_agent"],
        **kwargs,
    )
    for domain, kwargs in domains.items()
}

In [8]:
train_splits = {}
eval_splits = {}
test_splits = {}
for domain, dataset in datasets.items():
    seed_everything(config["seed"])
    train_dataset, eval_dataset, test_dataset = n_way_split(
        dataset,
        config["eval_split"],
        config["test_split"],
        infer_first=True,
    )
    train_splits[domain] = train_dataset
    eval_splits[domain] = eval_dataset
    test_splits[domain] = test_dataset

Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1500it [00:03, 464.53it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/ef0bf4ac00353b4c3cf35f1d81719f7c755a0aa74285110bc6d705aeab0f4275.pkl


Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1500it [00:12, 124.47it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/5486290edc393c9f30480a59dd534ea751ae8a31b48bfd03b5f098c4f8bdc1cc.pkl


Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1500it [00:18, 82.97it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/2982044d5332b295e5e8e3a5a6291d2757ee5843d7d6cbba05f6a53d57500b50.pkl


Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1500it [00:20, 72.62it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/22643d0366260c134eb83cdacb95ab8caf23fbaf0150d3f2f7ae2cea07101987.pkl


Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1500it [00:19, 78.72it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/1725afa23038757da3571b5b33d988ce03bef64b78aa936623b059be17e4b941.pkl


Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1628it [00:23, 70.73it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/cf046e8f2421ad8948f68de42e767365337f656a51ce9f9bf2a508f41d1e99db.pkl


Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1500it [00:27, 55.18it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/f38cb33ea5c3f40856a4db07115946d7a99c95fca0b54ce148415fb5063be266.pkl


Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1492it [00:26, 57.04it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/281b270f73261fde5b5517abe476f7282592e6e4c0c4161857d3fd83c33aaf9b.pkl


Seed set to 1337
[PrismaiDataset] Loading Documents from MongoDB: 1500it [00:19, 78.75it/s]


[PrismaiDataset] Writing Cache File /nvme/.cache/luminar/50f2f9ce260cd153c5bca0c86708073a7221aad4c39b121601fdbb01dc3e6d31.pkl


In [9]:
# feature_dim = OneDimFeatures(256)
# featurizer = FeatureExtractor.Likelihood()
# featurizer = FeatureExtractor.LogLikelihoodLogRankRatio()
# config["second_dim_as_channels"] = False
feature_dim = TwoDimFeatures(256, 13)
# featurizer = FeatureExtractor.LikelihoodTopkLikelihoodRatio(13)
featurizer = FeatureExtractor.IntermediateLogits(13)
config["second_dim_as_channels"] = True

# slicer = Slicer.Random(feature_dim[0])
slicer = Slicer.RandomMultiple(feature_dim[0] // 4, multiple=4, stride=16)
# slicer = Slicer.RandomMultiple(feature_dim[0] // 4, 4)

config["feature_dim"] = feature_dim
config["featurizer"] = repr(featurizer)
config["slicer"] = repr(slicer)

config["num_samples"] = None


def featurize(dataset) -> FeatureDataset:
    return FeatureDataset(
        dataset, slicer, featurizer, num_samples=config["num_samples"]
    )

In [10]:
train_datasets, eval_datasets, test_datasets = {}, {}, {}
for domain, dataset in tqdm(datasets.items()):
    train_datasets[domain] = featurize(train_splits[domain])
    eval_datasets[domain] = featurize(eval_splits[domain])
    test_datasets[domain] = featurize(test_splits[domain])

  0%|          | 0/9 [00:00<?, ?it/s]

## In-Domain Training & Evaluation

In [11]:
config |= {
    "projection_dim": None,
    "learning_rate": 0.0001,
    "warmup_steps": 66,
    "max_epochs": 50,
    "gradient_clip_val": 0.5,
    "batch_size": 32,
}

# SeqXGPT Layer Configuration
config["conv_layer_shapes"] = [
    ConvolutionalLayerSpec(64, 5),
    *[ConvolutionalLayerSpec(128, 3)] * 3,
    ConvolutionalLayerSpec(64, 3),
]

In [12]:
from torch.utils.data import ConcatDataset


# capturing config from "closure"
def get_dataloader(*dataset, **kwargs) -> PaddingDataloader:
    if len(dataset) == 1:
        dataset = dataset[0]
    else:
        dataset = ConcatDataset(dataset)
    return PaddingDataloader(
        dataset,
        feature_dim=config["feature_dim"],
        batch_size=config["batch_size"],
        **kwargs,
    )

In [13]:
results_in_domain = {}
for domain in domains:
    seed_everything(config["seed"])

    train_dataloader = get_dataloader(train_datasets[domain], shuffle=True)
    eval_dataloader = get_dataloader(eval_datasets[domain])

    model = DocumentClassficationModel(**config)
    trainer = Trainer(
        max_epochs=config["max_epochs"],
        logger=pl_loggers.TensorBoardLogger(
            save_dir=f"logs/in_domain/{type(featurizer).__name__}",
            name=domain,
        ),
        gradient_clip_val=config["gradient_clip_val"],
        callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=3)],
        deterministic=True,
    )

    trainer.fit(
        model,
        train_dataloaders=train_dataloader,
        val_dataloaders=eval_dataloader,
    )

    trainer.progress_bar_callback.disable()
    metrics = []
    for other, dataset in test_datasets.items():
        trainer.validate(model, get_dataloader(eval_datasets[other]), verbose=False)
        metrics.append(
            {
                "other": other,
            }
            | trainer.test(
                model,
                get_dataloader(dataset),
                verbose=False,
            )[0]
        )
    trainer.progress_bar_callback.enable()

    results_in_domain[domain] = {
        "domain": domain,
        "config": copy.deepcopy(config),
        "metrics": metrics,
    }
    print(domain, metrics)


Seed set to 1337
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Module

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Seed set to 1337


Blog Authorship [{'other': 'Blog Authorship', 'test_loss': 0.12927396595478058, 'test_acc@0.5': 0.9540635943412781, 'test_acc@best': 0.9540635943412781, 'test_roc_auc': 0.9905263185501099}, {'other': 'Student Essays', 'test_loss': 1.524064064025879, 'test_acc@0.5': 0.5600000023841858, 'test_acc@best': 0.6133333444595337, 'test_roc_auc': 0.6703110933303833}, {'other': 'CNN News', 'test_loss': 1.4044389724731445, 'test_acc@0.5': 0.5133333206176758, 'test_acc@best': 0.6966666579246521, 'test_roc_auc': 0.7703555822372437}, {'other': 'Euro Court Cases', 'test_loss': 2.6178324222564697, 'test_acc@0.5': 0.503333330154419, 'test_acc@best': 0.6600000262260437, 'test_roc_auc': 0.7273111343383789}, {'other': 'House of Commons', 'test_loss': 1.8245090246200562, 'test_acc@0.5': 0.5033556818962097, 'test_acc@best': 0.6610738039016724, 'test_roc_auc': 0.6809459328651428}, {'other': 'ArXiv Papers', 'test_loss': 1.4874645471572876, 'test_acc@0.5': 0.5017064809799194, 'test_acc@best': 0.7542662024497986

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Seed set to 1337


Student Essays [{'other': 'Blog Authorship', 'test_loss': 6.6242289543151855, 'test_acc@0.5': 0.6007066965103149, 'test_acc@best': 0.6077738404273987, 'test_roc_auc': 0.5453634262084961}, {'other': 'Student Essays', 'test_loss': 0.16484086215496063, 'test_acc@0.5': 0.9133333563804626, 'test_acc@best': 0.949999988079071, 'test_roc_auc': 0.9873777627944946}, {'other': 'CNN News', 'test_loss': 1.0666855573654175, 'test_acc@0.5': 0.7266666889190674, 'test_acc@best': 0.7833333611488342, 'test_roc_auc': 0.8714222311973572}, {'other': 'Euro Court Cases', 'test_loss': 1.1247600317001343, 'test_acc@0.5': 0.6966666579246521, 'test_acc@best': 0.800000011920929, 'test_roc_auc': 0.8642666935920715}, {'other': 'House of Commons', 'test_loss': 1.9349571466445923, 'test_acc@0.5': 0.7651006579399109, 'test_acc@best': 0.8020133972167969, 'test_roc_auc': 0.7896396517753601}, {'other': 'ArXiv Papers', 'test_loss': 1.8645610809326172, 'test_acc@0.5': 0.7576791644096375, 'test_acc@best': 0.7713310718536377,

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Seed set to 1337


CNN News [{'other': 'Blog Authorship', 'test_loss': 2.6755316257476807, 'test_acc@0.5': 0.5194346308708191, 'test_acc@best': 0.5300353169441223, 'test_roc_auc': 0.24295739829540253}, {'other': 'Student Essays', 'test_loss': 1.0468742847442627, 'test_acc@0.5': 0.7099999785423279, 'test_acc@best': 0.7166666388511658, 'test_roc_auc': 0.7954666614532471}, {'other': 'CNN News', 'test_loss': 0.20822416245937347, 'test_acc@0.5': 0.9133333563804626, 'test_acc@best': 0.9433333277702332, 'test_roc_auc': 0.9815999865531921}, {'other': 'Euro Court Cases', 'test_loss': 0.37343406677246094, 'test_acc@0.5': 0.8500000238418579, 'test_acc@best': 0.8766666650772095, 'test_roc_auc': 0.9469777941703796}, {'other': 'House of Commons', 'test_loss': 0.6511669158935547, 'test_acc@0.5': 0.8456375598907471, 'test_acc@best': 0.8389261960983276, 'test_roc_auc': 0.846486508846283}, {'other': 'ArXiv Papers', 'test_loss': 0.7939708828926086, 'test_acc@0.5': 0.8054607510566711, 'test_acc@best': 0.8395904302597046, 't

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Seed set to 1337


Euro Court Cases [{'other': 'Blog Authorship', 'test_loss': 3.750967264175415, 'test_acc@0.5': 0.46289753913879395, 'test_acc@best': 0.5300353169441223, 'test_roc_auc': 0.26654136180877686}, {'other': 'Student Essays', 'test_loss': 1.239440679550171, 'test_acc@0.5': 0.75, 'test_acc@best': 0.7433333396911621, 'test_roc_auc': 0.8228444457054138}, {'other': 'CNN News', 'test_loss': 0.4872305989265442, 'test_acc@0.5': 0.8533333539962769, 'test_acc@best': 0.8366666436195374, 'test_roc_auc': 0.9318666458129883}, {'other': 'Euro Court Cases', 'test_loss': 0.12037040293216705, 'test_acc@0.5': 0.95333331823349, 'test_acc@best': 0.9633333086967468, 'test_roc_auc': 0.9942666888237}, {'other': 'House of Commons', 'test_loss': 0.5555992126464844, 'test_acc@0.5': 0.8523489832878113, 'test_acc@best': 0.8456375598907471, 'test_roc_auc': 0.9129729866981506}, {'other': 'ArXiv Papers', 'test_loss': 0.5808747410774231, 'test_acc@0.5': 0.8839590549468994, 'test_acc@best': 0.8771331310272217, 'test_roc_auc'

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Seed set to 1337


House of Commons [{'other': 'Blog Authorship', 'test_loss': 1.6250951290130615, 'test_acc@0.5': 0.4098939895629883, 'test_acc@best': 0.5123674869537354, 'test_roc_auc': 0.4286716878414154}, {'other': 'Student Essays', 'test_loss': 0.5336195826530457, 'test_acc@0.5': 0.8266666531562805, 'test_acc@best': 0.8199999928474426, 'test_roc_auc': 0.9189333319664001}, {'other': 'CNN News', 'test_loss': 0.5140358805656433, 'test_acc@0.5': 0.8066666722297668, 'test_acc@best': 0.7933333516120911, 'test_roc_auc': 0.9035999774932861}, {'other': 'Euro Court Cases', 'test_loss': 0.6002725958824158, 'test_acc@0.5': 0.8299999833106995, 'test_acc@best': 0.8100000023841858, 'test_roc_auc': 0.9213777780532837}, {'other': 'House of Commons', 'test_loss': 0.23081710934638977, 'test_acc@0.5': 0.9161073565483093, 'test_acc@best': 0.9228187799453735, 'test_roc_auc': 0.9741891622543335}, {'other': 'ArXiv Papers', 'test_loss': 0.3865963816642761, 'test_acc@0.5': 0.8976109027862549, 'test_acc@best': 0.8941979408264

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Seed set to 1337


ArXiv Papers [{'other': 'Blog Authorship', 'test_loss': 3.663987159729004, 'test_acc@0.5': 0.5017668008804321, 'test_acc@best': 0.5441696047782898, 'test_roc_auc': 0.5705012679100037}, {'other': 'Student Essays', 'test_loss': 0.6074722409248352, 'test_acc@0.5': 0.800000011920929, 'test_acc@best': 0.79666668176651, 'test_roc_auc': 0.8910222053527832}, {'other': 'CNN News', 'test_loss': 0.47591787576675415, 'test_acc@0.5': 0.8100000023841858, 'test_acc@best': 0.8233333230018616, 'test_roc_auc': 0.8979555368423462}, {'other': 'Euro Court Cases', 'test_loss': 0.6954304575920105, 'test_acc@0.5': 0.7599999904632568, 'test_acc@best': 0.8266666531562805, 'test_roc_auc': 0.9099110960960388}, {'other': 'House of Commons', 'test_loss': 0.6852285265922546, 'test_acc@0.5': 0.8389261960983276, 'test_acc@best': 0.8590604066848755, 'test_roc_auc': 0.8872522711753845}, {'other': 'ArXiv Papers', 'test_loss': 0.1830557882785797, 'test_acc@0.5': 0.9317406415939331, 'test_acc@best': 0.9215016961097717, 'te

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Seed set to 1337


Gutenberg [{'other': 'Blog Authorship', 'test_loss': 1.7988485097885132, 'test_acc@0.5': 0.4840989410877228, 'test_acc@best': 0.5265017747879028, 'test_roc_auc': 0.36812031269073486}, {'other': 'Student Essays', 'test_loss': 0.6884621381759644, 'test_acc@0.5': 0.7233333587646484, 'test_acc@best': 0.8133333325386047, 'test_roc_auc': 0.893822193145752}, {'other': 'CNN News', 'test_loss': 0.53090500831604, 'test_acc@0.5': 0.7900000214576721, 'test_acc@best': 0.79666668176651, 'test_roc_auc': 0.861822247505188}, {'other': 'Euro Court Cases', 'test_loss': 1.000800371170044, 'test_acc@0.5': 0.6299999952316284, 'test_acc@best': 0.7400000095367432, 'test_roc_auc': 0.8449777960777283}, {'other': 'House of Commons', 'test_loss': 0.35946178436279297, 'test_acc@0.5': 0.8255033493041992, 'test_acc@best': 0.8355704545974731, 'test_roc_auc': 0.9254504442214966}, {'other': 'ArXiv Papers', 'test_loss': 0.3020920753479004, 'test_acc@0.5': 0.8703071475028992, 'test_acc@best': 0.8771331310272217, 'test_ro

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Seed set to 1337


Bundestag [DE] [{'other': 'Blog Authorship', 'test_loss': 7.39722204208374, 'test_acc@0.5': 0.16961130499839783, 'test_acc@best': 0.5300353169441223, 'test_roc_auc': 0.19576440751552582}, {'other': 'Student Essays', 'test_loss': 8.450758934020996, 'test_acc@0.5': 0.5299999713897705, 'test_acc@best': 0.5799999833106995, 'test_roc_auc': 0.6588444709777832}, {'other': 'CNN News', 'test_loss': 5.898632049560547, 'test_acc@0.5': 0.5299999713897705, 'test_acc@best': 0.6633333563804626, 'test_roc_auc': 0.81086665391922}, {'other': 'Euro Court Cases', 'test_loss': 11.601397514343262, 'test_acc@0.5': 0.49000000953674316, 'test_acc@best': 0.5099999904632568, 'test_roc_auc': 0.5934666395187378}, {'other': 'House of Commons', 'test_loss': 5.868144512176514, 'test_acc@0.5': 0.47651007771492004, 'test_acc@best': 0.5973154306411743, 'test_roc_auc': 0.6883333325386047}, {'other': 'ArXiv Papers', 'test_loss': 5.0874199867248535, 'test_acc@0.5': 0.4948805570602417, 'test_acc@best': 0.6484641432762146, '

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Spiegel [DE] [{'other': 'Blog Authorship', 'test_loss': 1.899004578590393, 'test_acc@0.5': 0.5300353169441223, 'test_acc@best': 0.554770290851593, 'test_roc_auc': 0.3709774315357208}, {'other': 'Student Essays', 'test_loss': 1.9004229307174683, 'test_acc@0.5': 0.6133333444595337, 'test_acc@best': 0.6233333349227905, 'test_roc_auc': 0.6586666703224182}, {'other': 'CNN News', 'test_loss': 1.2783634662628174, 'test_acc@0.5': 0.6633333563804626, 'test_acc@best': 0.6700000166893005, 'test_roc_auc': 0.740066647529602}, {'other': 'Euro Court Cases', 'test_loss': 1.8682434558868408, 'test_acc@0.5': 0.6033333539962769, 'test_acc@best': 0.6800000071525574, 'test_roc_auc': 0.7562888860702515}, {'other': 'House of Commons', 'test_loss': 1.1434385776519775, 'test_acc@0.5': 0.6677852272987366, 'test_acc@best': 0.6744966506958008, 'test_roc_auc': 0.7288288474082947}, {'other': 'ArXiv Papers', 'test_loss': 0.9927137494087219, 'test_acc@0.5': 0.7337883710861206, 'test_acc@best': 0.7406143546104431, 'te

In [14]:
import pandas as pd

results = []
for domain in domains:
    results.append(
        [results_in_domain[domain]["metrics"][i]["test_roc_auc"] for i in range(len(domains))]
    )

df = pd.DataFrame(results, columns=domains, index=domains)
df

Unnamed: 0,Blog Authorship,Student Essays,CNN News,Euro Court Cases,House of Commons,ArXiv Papers,Gutenberg,Bundestag [DE],Spiegel [DE]
Blog Authorship,0.990526,0.670311,0.770356,0.727311,0.680946,0.818656,0.830402,0.585027,0.671365
Student Essays,0.545363,0.987378,0.871422,0.864267,0.78964,0.765632,0.87461,0.712686,0.776286
CNN News,0.242957,0.795467,0.9816,0.946978,0.846487,0.856025,0.904823,0.728708,0.656823
Euro Court Cases,0.266541,0.822844,0.931867,0.994267,0.912973,0.919765,0.871915,0.569288,0.50094
House of Commons,0.428672,0.918933,0.9036,0.921378,0.974189,0.935887,0.882033,0.713489,0.65387
ArXiv Papers,0.570501,0.891022,0.897956,0.909911,0.887252,0.979499,0.911111,0.603696,0.497852
Gutenberg,0.36812,0.893822,0.861822,0.844978,0.92545,0.944041,0.977116,0.718074,0.716868
Bundestag [DE],0.195764,0.658844,0.810867,0.593467,0.688333,0.751864,0.787612,0.954627,0.927696
Spiegel [DE],0.370977,0.658667,0.740067,0.756289,0.728829,0.787205,0.687092,0.892476,0.953468


## Cross-Domain Training & Evaluation

In [15]:
seed_everything(config["seed"])
train_dataloader = get_dataloader(*train_datasets.values(), shuffle=True)
eval_dataloader = get_dataloader(*eval_datasets.values())
test_dataloader = get_dataloader(*test_datasets.values())

model = DocumentClassficationModel(**config)
trainer = Trainer(
    max_epochs=config["max_epochs"],
    logger=pl_loggers.TensorBoardLogger(
        save_dir=f"logs/all_domains/{type(featurizer).__name__}",
    ),
    gradient_clip_val=config["gradient_clip_val"],
    callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=3)],
    deterministic=True,
)
trainer.fit(
    model,
    train_dataloaders=train_dataloader,
    val_dataloaders=eval_dataloader,
)

Seed set to 1337
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes  
--------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?          
1 | projection  | Identity          | 0      | train | [32, 16384] | [32, 16384]
2 | classifier  | Linear            | 16.4 K | train | [32, 16384] | [32, 1]    
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?          
--------------------------------------------------------------------------------------
168 K     Trainable params
0         Non-trainable params
168 K     Total params
0.674     Total estimated model params size (MB)
15        Modules in train mode
0         Module

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

In [16]:
trainer.progress_bar_callback.disable()
metrics_cross_domain = []
for other, dataset in test_datasets.items():
    trainer.validate(model, get_dataloader(eval_datasets[domain]), verbose=False)
    metrics_cross_domain.append(
        {"other": other}
        | trainer.test(
            model,
            get_dataloader(dataset),
            verbose=False,
        )[0]
    )
trainer.validate(model, eval_dataloader, verbose=False)
metrics_cross_domain += [
    {
        "other": "ALL",
        **trainer.test(
            model,
            test_dataloader,
            verbose=False,
        )[0],
    }
]
trainer.progress_bar_callback.enable()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


In [17]:
import pandas as pd


def df_to_latex_heatmap(_df: pd.DataFrame):
    print(
        "\\plotHeatmap{"
        + ",".join(_df.index)
        + "}{%\n    "
        + ",%\n    ".join(
            [
                "{"
                + ",".join(f"{val:.4f}/{round(val, 2):.2f}" for val in row[1:])
                + "}"
                for row in _df.reset_index().values
            ]
        )
        + "%\n}{"
        + ",".join(_df.columns)
        + "}"
    )


In [18]:
# _metric = "test_roc_auc"
_metric = "test_acc"

results = []
for domain in domains:
    results.append(
        [
            results_in_domain[domain]["metrics"][i][_metric]
            for i in range(len(domains))
        ]
    )
results.append([m[_metric] for m in metrics_cross_domain[:len(domains)]])

df = pd.DataFrame(results, columns=domains, index=list(domains) + ["ALL"])
df["AVG"] = df.mean(axis=1)
df["AVG"][-1] = metrics_cross_domain[-1][_metric]
df

KeyError: 'test_acc'

In [None]:
df_to_latex_heatmap(df)

In [None]:
config

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

ax = sns.heatmap(
    df,
    annot=True,
    fmt=".2f",
    vmax=1.0,
    vmin=0.0,
    cmap=sns.cubehelix_palette(rot=-0.2, as_cmap=True),
    yticklabels=list(domains) + ["ALL"],
    xticklabels=list(domains) + ["AVG"],
    square=True,
    # reduce annotation font size
    annot_kws={"fontsize": 8},
    cbar=False,
)

# rotate x-axis labels by 45 degrees
# anchored at the right edge of the axes
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
    tick.set_horizontalalignment("right")

plt.tight_layout()
# plt.savefig(
#     "../figures/evaluation-trained_in_domain-test_0.1-gpt2_256-rand_4-il_13_as_channels.pdf",
#     dpi=300,
# )
plt.show()

### LLR on Whole Datasets

In [None]:
from sklearn.metrics import auc, roc_curve

from luminar.baselines import llr_from_transition_scores
from simple_dataset import Dataset as SimpleDataset
from transition_scores.data import TransitionScores

results_llr = []
for domain, split in datasets.items():
    dataset_test = (
        SimpleDataset(split)
        .flat_map(lambda doc: doc["features"])
        .map(
            lambda x: {
                "llr": llr_from_transition_scores(
                    TransitionScores(**x["transition_scores"])
                ),
                "labels": int(x["type"] != "source"),
            },
            in_place=False,
        )
    )
    fpr, tpr, _ = roc_curve(dataset_test["labels"], dataset_test["llr"])
    auroc = auc(fpr, tpr)

    preds = np.array(dataset_test["llr"])
    labels = np.array(dataset_test["labels"])

    mean_0 = float(np.mean(preds[labels == 0]))
    mean_1 = float(np.mean(preds[labels == 1]))

    thresholds = np.linspace(round(mean_0, 1) - 0.2, round(mean_1, 1) + 0.3, 5001)
    preds_thresholded: np.ndarray = preds > thresholds.reshape(-1, 1)
    acc_thresholded = np.mean((preds_thresholded == labels), axis=1)
    idx = np.argmax(acc_thresholded)
    best_threshold = thresholds[idx]
    best_acc = acc_thresholded[idx]

    results_llr.append({
        "domain": domain,
        "auroc": auroc,
        "best_acc": best_acc,
        "best_threshold": best_threshold,
    })

pd.DataFrame(results_llr)

### LLR on Test Splits

In [None]:
from sklearn.metrics import auc, roc_curve

from luminar.baselines import llr_from_transition_scores
from simple_dataset import Dataset as SimpleDataset
from transition_scores.data import TransitionScores

results_llr = []
for domain, split in test_splits.items():
    dataset_test = (
        SimpleDataset(split)
        .flat_map(lambda doc: doc["features"])
        .map(
            lambda x: {
                "llr": llr_from_transition_scores(
                    TransitionScores(**x["transition_scores"])
                ),
                "labels": int(x["type"] != "source"),
            },
            in_place=False,
        )
    )
    fpr, tpr, _ = roc_curve(dataset_test["labels"], dataset_test["llr"])
    auroc = auc(fpr, tpr)

    preds = np.array(dataset_test["llr"])
    labels = np.array(dataset_test["labels"])

    mean_0 = float(np.mean(preds[labels == 0]))
    mean_1 = float(np.mean(preds[labels == 1]))

    thresholds = np.linspace(round(mean_0, 1) - 0.2, round(mean_1, 1) + 0.3, 1001)
    preds_thresholded: np.ndarray = preds > thresholds.reshape(-1, 1)
    acc_thresholded = np.mean((preds_thresholded == labels), axis=1)
    idx = np.argmax(acc_thresholded)
    best_threshold = thresholds[idx]
    best_acc = acc_thresholded[idx]

    results_llr.append({
        "domain": domain,
        "auroc": auroc,
        "best_acc": best_acc,
        "best_threshold": best_threshold,
    })

pd.DataFrame(results_llr)

In [None]:
raise RuntimeError("STOP")

In [None]:
sample = train_datasets["CNN News"][0]["features"].numpy().squeeze()
train_datasets["CNN News"][0]["labels"]

In [None]:
fig = plt.imshow(sample.T, cmap=sns.cubehelix_palette(rot=-0.2, as_cmap=True))
fig.axes.set_axis_off()
plt.show()

In [None]:
sample = train_datasets["CNN News"][1]["features"].numpy().squeeze()
train_datasets["CNN News"][1]["labels"]

In [None]:
fig = plt.imshow(sample.T, cmap=sns.cubehelix_palette(rot=-0.2, as_cmap=True))
fig.axes.set_axis_off()
plt.show()

In [None]:
raise RuntimeError("STOP")

In [None]:
from sklearn.metrics import auc, roc_curve

from luminar.baselines import llr_from_transition_scores

dataset_test = dm._dataset_test.map(
    lambda x: {
        "llr": llr_from_transition_scores(x["features"]),
        "labels": x["labels"],
    },
    in_place=False,
)
fpr, tpr, _ = roc_curve(dataset_test["labels"], dataset_test["llr"])
auc(fpr, tpr)