In [1]:
import copy
import importlib
import os
import warnings
from itertools import product

import numpy as np
import pandas as pd
import torch
from dotenv import load_dotenv
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch import loggers as pl_loggers
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from tqdm.auto import tqdm, trange

from luminar.document.data import (
    FeatureDataset,
    PaddingDataloader,
    n_way_split,
)
from luminar.document.model import CNNDocumentClassficationModel, ConvolutionalLayerSpec
from luminar.features import FeatureExtractor, OneDimFeatures, Slicer, TwoDimFeatures
from luminar.mongo import PrismaiDataset

load_dotenv("../env")

warnings.filterwarnings("ignore", ".*does not have many workers.*")

In [2]:
domains = {
    "Blog Authorship": {"domain": "blog_authorship_corpus"},
    "Student Essays": {"domain": "student_essays"},
    "CNN News": {"domain": "cnn_news"},
    "Euro Court Cases": {"domain": "euro_court_cases"},
    "House of Commons": {"domain": "house_of_commons"},
    "ArXiv Papers": {"domain": "arxiv_papers"},
    "Gutenberg": {"domain": "gutenberg", "lang": "en-EN"},
    "Bundestag$_{de}$": {"domain": "bundestag"},
    "Spiegel$_{de}$": {"domain": "spiegel_articles"},
    # "Gutenberg [DE]": {"domain": "gutenberg", "lang": "de-DE"},
}

In [10]:
config = {
    "eval_split": 0.1,
    "test_split": 0.2,
    "feature_model": "gpt2",
    # "feature_model": "meta-llama/Llama-3.2-1B",
    "synth_agent": "gpt-4o-mini",
    # "synth_agent": "gemma2:9b"
    "document_type": "chunk",
    "additional_match_conditions": {
        "$or": [
            {"document.synth_token_end": {"$exists": False}},
            {
                "document.synth_token_end": {"$lt": 1024},
            },
        ]
    },
}

## Features

In [11]:
# feature_dim = OneDimFeatures(256)
# featurizer = FeatureExtractor.Likelihood()
# featurizer = FeatureExtractor.LogLikelihoodLogRankRatio()
# config["second_dim_as_channels"] = False
feature_dim = TwoDimFeatures(256, 13)
# featurizer = FeatureExtractor.LikelihoodTopkLikelihoodRatio(16)
featurizer = FeatureExtractor.IntermediateLikelihood(13)
# config["second_dim_as_channels"] = False
config["second_dim_as_channels"] = True

# slicer = Slicer.First(feature_dim[0])
# slicer = Slicer.Random(feature_dim[0])
slicer = Slicer.RandomMultiple(feature_dim[0] // 4, multiple=4, stride=16)
# slicer = Slicer.RandomMultiple(feature_dim[0] // 4, multiple=4, stride=64)
# slicer = Slicer.RandomMultiple(feature_dim[0] // 4, 4)

config["feature_dim"] = feature_dim
config["featurizer"] = repr(featurizer)
config["slicer"] = repr(slicer)

config["num_samples"] = None


def featurize(dataset) -> FeatureDataset:
    return FeatureDataset(
        tqdm(dataset, position=1, leave=False),
        slicer,
        featurizer,
        num_samples=config["num_samples"],
    )

In [12]:
config |= {
    "projection_dim": 32,
    "learning_rate": 0.0001,
    "warmup_steps": 66,
    "max_epochs": 50,
    "gradient_clip_val": 1.0,
    "batch_size": 32,
}

# SeqXGPT Layer Configuration
config["conv_layer_shapes"] = [
    ConvolutionalLayerSpec(64, 5),
    *[ConvolutionalLayerSpec(128, 3)] * 3,
    ConvolutionalLayerSpec(64, 3),
]

In [13]:
from torch.utils.data import ConcatDataset


# capturing config from "closure"
def get_dataloader(*dataset, **kwargs) -> PaddingDataloader:
    if len(dataset) == 1:
        dataset = dataset[0]
    else:
        dataset = ConcatDataset(dataset)
    return PaddingDataloader(
        dataset,
        feature_dim=config["feature_dim"],
        batch_size=config["batch_size"],
        **kwargs,
    )

In [14]:
config["seed"] = 42

sizes = [0.1] * 10
splits = {}
for domain, kwargs in tqdm(domains.items(), desc="Domains", position=0):
    seed_everything(config["seed"])
    splits[domain] = [
        featurize(subset)
        for subset in n_way_split(
            PrismaiDataset(
                mongo_db_connection="mongodb://prismai:prismai@isengart.hucompute.org:27123/?retryWrites=true&loadBalanced=false&serverSelectionTimeoutMS=5000&connectTimeoutMS=10000&authSource=admin&authMechanism=SCRAM-SHA-256",
                database="prismai",
                collection="features_prismai",
                feature_model=config["feature_model"],
                synth_agent=config["synth_agent"],
                document_type=config["document_type"],
                additional_match_conditions=config.get("additional_match_conditions", {}),
                **kwargs,
                update_cache=True,
            ),
            *sizes,
        )
    ]

Domains:   0%|          | 0/9 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/313cffda7f723067b5d129b1bdcbb88d139ae01b54f1513fd581ef14109deef5.pkl


  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/3bcd7a7f5018a8afed3d2890f31214f90112817fa855bd214e20a4b6dc8ed103.pkl


  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/ebfd82554ae2401c2212407ebe1ea99f0fcaa0070ece1c99f8237f506c6aa72e.pkl


  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/34aafb3f48ea852b98e7617c0e2b578d721b5713bf8a432be9006002e3984c20.pkl


  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/f8336c0ca6401d4c148cfa8a0dc018bd5307179625f75792ab575ca5639505ae.pkl


  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/0b92f19e4324ebaae4d9bdf9b31158ef58b60f90d103bc8ead4ddb289083a64d.pkl


  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/152 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

  0%|          | 0/151 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/be7aa10d80ce4be197108eb2f8c9c8e1ae55038ddc1d4c9c10096998f12f741a.pkl


  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:04<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/e7761383a24ac8612987a08307acb8c8f19e7018ffdc2ce6d395615982f4f656.pkl


  0%|          | 0/146 [00:00<?, ?it/s]

  0%|          | 0/146 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

  0%|          | 0/145 [00:00<?, ?it/s]

Seed set to 42


[PrismaiDataset] Loading Documents from MongoDB: 0it [00:00, ?it/s]

[PrismaiDataset] Writing Cache File /tmp/luminar/features_prismai/a7e01bb679ad57d67d47ce77a9b60efb1dc98d95edd110825ad707a7a71e25a1.pkl


  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/150 [00:00<?, ?it/s]

## In-Domain Training & Evaluation

In [15]:
from collections import defaultdict

metrics_in_domain = defaultdict(list)
for domain, subsets in tqdm(splits.items()):
    for _ in trange(5, desc=domain, position=1):
        seed_everything(config["seed"])
        eval_dataset = subsets.pop(0)
        test_dataloader = get_dataloader(*subsets[:2])
        train_dataloader = get_dataloader(*subsets[2:], shuffle=True)
        eval_dataloader = get_dataloader(eval_dataset)
        subsets.append(eval_dataset)

        model = CNNDocumentClassficationModel(**config)
        trainer = Trainer(
            max_epochs=config["max_epochs"],
            logger=pl_loggers.TensorBoardLogger(
                save_dir=f"logs/in_domain/{type(featurizer).__name__}",
                name=domain,
            ),
            gradient_clip_val=config["gradient_clip_val"],
            callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=3)],
            deterministic=True,
        )
        trainer.progress_bar_callback.disable()

        trainer.fit(
            model,
            train_dataloaders=train_dataloader,
            val_dataloaders=eval_dataloader,
        )
        (metrics,) = trainer.test(model, test_dataloader, verbose=False)
        metrics_in_domain[domain].append(metrics)

        print(domain, metrics)


  0%|          | 0/9 [00:00<?, ?it/s]

Blog Authorship:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (48) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
----------------------------------------------------------------------------------

Blog Authorship {'test_loss': 0.31109312176704407, 'test_f1@0.5': 0.6634615659713745, 'test_f1@best': 0.6567164063453674, 'test_f1_threshold': 0.5299000144004822, 'test_acc@0.5': 0.8275862336158752, 'test_acc@best': 0.8300492763519287, 'test_acc_threshold': 0.5299000144004822, 'test_roc_auc': 0.9113836288452148}


/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (48) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params si

Blog Authorship {'test_loss': 0.3319130837917328, 'test_f1@0.5': 0.6666666865348816, 'test_f1@best': 0.6331658363342285, 'test_f1_threshold': 0.5916000008583069, 'test_acc@0.5': 0.8253588676452637, 'test_acc@best': 0.8253588676452637, 'test_acc_threshold': 0.5916000008583069, 'test_roc_auc': 0.9012147188186646}


/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (47) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params si

Blog Authorship {'test_loss': 0.3500365614891052, 'test_f1@0.5': 0.7184466123580933, 'test_f1@best': 0.5435897707939148, 'test_f1_threshold': 0.7365000247955322, 'test_acc@0.5': 0.7981438636779785, 'test_acc@best': 0.8004640340805054, 'test_acc_threshold': 0.7099999785423279, 'test_roc_auc': 0.899541974067688}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.


Blog Authorship {'test_loss': 0.49172213673591614, 'test_f1@0.5': 0.588744580745697, 'test_f1@best': 0.6641790866851807, 'test_f1_threshold': 0.337799996137619, 'test_acc@0.5': 0.7795823812484741, 'test_acc@best': 0.8004640340805054, 'test_acc_threshold': 0.2599000036716461, 'test_roc_auc': 0.8768193125724792}


/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (47) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params si

Blog Authorship {'test_loss': 0.4483988881111145, 'test_f1@0.5': 0.6484641432762146, 'test_f1@best': 0.6600660085678101, 'test_f1_threshold': 0.4650000035762787, 'test_acc@0.5': 0.7653758525848389, 'test_acc@best': 0.7653758525848389, 'test_acc_threshold': 0.4650000035762787, 'test_roc_auc': 0.8532134294509888}


Student Essays:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Student Essays {'test_loss': 0.41431087255477905, 'test_f1@0.5': 0.8252426981925964, 'test_f1@best': 0.8122866749763489, 'test_f1_threshold': 0.6704000234603882, 'test_acc@0.5': 0.8131487965583801, 'test_acc@best': 0.8096885681152344, 'test_acc_threshold': 0.6704000234603882, 'test_roc_auc': 0.8935012221336365}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Student Essays {'test_loss': 0.41541966795921326, 'test_f1@0.5': 0.8239316344261169, 'test_f1@best': 0.774319052696228, 'test_f1_threshold': 0.7871999740600586, 'test_acc@0.5': 0.821490466594696, 'test_acc@best': 0.8232235908508301, 'test_acc_threshold': 0.5590000152587891, 'test_roc_auc': 0.9006498456001282}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Student Essays {'test_loss': 0.3999391198158264, 'test_f1@0.5': 0.8566827774047852, 'test_f1@best': 0.7963302731513977, 'test_f1_threshold': 0.8281999826431274, 'test_acc@0.5': 0.8457539081573486, 'test_acc@best': 0.8076256513595581, 'test_acc_threshold': 0.8281999826431274, 'test_roc_auc': 0.898158848285675}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Student Essays {'test_loss': 0.3307008445262909, 'test_f1@0.5': 0.8473967909812927, 'test_f1@best': 0.8941176533699036, 'test_f1_threshold': 0.262800008058548, 'test_acc@0.5': 0.8524305820465088, 'test_acc@best': 0.890625, 'test_acc_threshold': 0.262800008058548, 'test_roc_auc': 0.9206521511077881}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Student Essays {'test_loss': 0.691491425037384, 'test_f1@0.5': 0.0, 'test_f1@best': 0.007462686393409967, 'test_f1_threshold': 0.4756999909877777, 'test_acc@0.5': 0.5291005373001099, 'test_acc@best': 0.5167548656463623, 'test_acc_threshold': 0.47540000081062317, 'test_roc_auc': 0.5550374388694763}


CNN News:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

CNN News {'test_loss': 0.7133493423461914, 'test_f1@0.5': 0.05181347206234932, 'test_f1@best': 0.02139037474989891, 'test_f1_threshold': 0.541700005531311, 'test_acc@0.5': 0.6195425987243652, 'test_acc@best': 0.6195425987243652, 'test_acc_threshold': 0.4677000045776367, 'test_roc_auc': 0.6002209782600403}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

CNN News {'test_loss': 0.708859920501709, 'test_f1@0.5': 0.3496503531932831, 'test_f1@best': 0.033149171620607376, 'test_f1_threshold': 0.7523999810218811, 'test_acc@0.5': 0.6092436909675598, 'test_acc@best': 0.6407563090324402, 'test_acc_threshold': 0.6175000071525574, 'test_roc_auc': 0.5746591091156006}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

CNN News {'test_loss': 0.6917824149131775, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.5051000118255615, 'test_acc@0.5': 0.6057494878768921, 'test_acc@best': 0.6119096279144287, 'test_acc_threshold': 0.44999998807907104, 'test_roc_auc': 0.6206417083740234}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

CNN News {'test_loss': 0.6235486268997192, 'test_f1@0.5': 0.4833836853504181, 'test_f1@best': 0.08955223858356476, 'test_f1_threshold': 0.7337999939918518, 'test_acc@0.5': 0.6474226713180542, 'test_acc@best': 0.6391752362251282, 'test_acc_threshold': 0.6894000172615051, 'test_roc_auc': 0.6735495328903198}


Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


CNN News {'test_loss': 0.6376870274543762, 'test_f1@0.5': 0.38686132431030273, 'test_f1@best': 0.0628272220492363, 'test_f1_threshold': 0.6833999752998352, 'test_acc@0.5': 0.6485355496406555, 'test_acc@best': 0.6129707098007202, 'test_acc_threshold': 0.362199991941452, 'test_roc_auc': 0.6522097587585449}


Euro Court Cases:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Euro Court Cases {'test_loss': 0.2144184410572052, 'test_f1@0.5': 0.8997669219970703, 'test_f1@best': 0.9178082346916199, 'test_f1_threshold': 0.4302999973297119, 'test_acc@0.5': 0.9158512949943542, 'test_acc@best': 0.9295498728752136, 'test_acc_threshold': 0.4302999973297119, 'test_roc_auc': 0.9698420166969299}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Euro Court Cases {'test_loss': 0.2599661648273468, 'test_f1@0.5': 0.9068322777748108, 'test_f1@best': 0.9188033938407898, 'test_f1_threshold': 0.725600004196167, 'test_acc@0.5': 0.913957953453064, 'test_acc@best': 0.9273422360420227, 'test_acc_threshold': 0.725600004196167, 'test_roc_auc': 0.9637070298194885}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Euro Court Cases {'test_loss': 0.195577010512352, 'test_f1@0.5': 0.9197397232055664, 'test_f1@best': 0.9142857193946838, 'test_f1_threshold': 0.0869000032544136, 'test_acc@0.5': 0.9296578168869019, 'test_acc@best': 0.9201520681381226, 'test_acc_threshold': 0.0869000032544136, 'test_roc_auc': 0.9725516438484192}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Euro Court Cases {'test_loss': 0.24325589835643768, 'test_f1@0.5': 0.9113401770591736, 'test_f1@best': 0.9106029272079468, 'test_f1_threshold': 0.5920000076293945, 'test_acc@0.5': 0.9180952310562134, 'test_acc@best': 0.9180952310562134, 'test_acc_threshold': 0.5920000076293945, 'test_roc_auc': 0.9586370587348938}



  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Euro Court Cases {'test_loss': 0.24581556022167206, 'test_f1@0.5': 0.8957871198654175, 'test_f1@best': 0.9060542583465576, 'test_f1_threshold': 0.21649999916553497, 'test_acc@0.5': 0.9104762077331543, 'test_acc@best': 0.9142857193946838, 'test_acc_threshold': 0.21649999916553497, 'test_roc_auc': 0.9613184928894043}


House of Commons:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

House of Commons {'test_loss': 0.6613554954528809, 'test_f1@0.5': 0.5875706076622009, 'test_f1@best': 0.46226415038108826, 'test_f1_threshold': 0.9304999709129333, 'test_acc@0.5': 0.6791208982467651, 'test_acc@best': 0.7120879292488098, 'test_acc_threshold': 0.6327999830245972, 'test_roc_auc': 0.7391613125801086}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

House of Commons {'test_loss': 0.5768523812294006, 'test_f1@0.5': 0.4220183491706848, 'test_f1@best': 0.36538460850715637, 'test_f1_threshold': 0.5523999929428101, 'test_acc@0.5': 0.7301927208900452, 'test_acc@best': 0.7173447608947754, 'test_acc_threshold': 0.5523999929428101, 'test_roc_auc': 0.763113796710968}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

House of Commons {'test_loss': 0.5371496677398682, 'test_f1@0.5': 0.6530612111091614, 'test_f1@best': 0.5815602540969849, 'test_f1_threshold': 0.6428999900817871, 'test_acc@0.5': 0.7468085289001465, 'test_acc@best': 0.7510638236999512, 'test_acc_threshold': 0.6355000138282776, 'test_roc_auc': 0.7878235578536987}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

House of Commons {'test_loss': 0.5549911856651306, 'test_f1@0.5': 0.5454545617103577, 'test_f1@best': 0.5461254715919495, 'test_f1_threshold': 0.46970000863075256, 'test_acc@0.5': 0.7457627058029175, 'test_acc@best': 0.7266949415206909, 'test_acc_threshold': 0.28380000591278076, 'test_roc_auc': 0.7562790513038635}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


House of Commons {'test_loss': 0.5857731699943542, 'test_f1@0.5': 0.3619047701358795, 'test_f1@best': 0.53125, 'test_f1_threshold': 0.3758000135421753, 'test_acc@0.5': 0.7124463319778442, 'test_acc@best': 0.7424892783164978, 'test_acc_threshold': 0.3758000135421753, 'test_roc_auc': 0.7518072128295898}


ArXiv Papers:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (36) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
----------------------------------------------------------------------------------

ArXiv Papers {'test_loss': 0.2912268340587616, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.0763000026345253, 'test_acc@0.5': 0.9233129024505615, 'test_acc@best': 0.9202454090118408, 'test_acc_threshold': 0.0763000026345253, 'test_roc_auc': 0.7722259163856506}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (36) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K  

ArXiv Papers {'test_loss': 0.2828540503978729, 'test_f1@0.5': 0.0, 'test_f1@best': 0.19512194395065308, 'test_f1_threshold': 0.4634000062942505, 'test_acc@0.5': 0.918181836605072, 'test_acc@best': 0.8999999761581421, 'test_acc_threshold': 0.4634000062942505, 'test_roc_auc': 0.7536975741386414}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (36) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K  

ArXiv Papers {'test_loss': 0.23089370131492615, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.4880000054836273, 'test_acc@0.5': 0.9294478297233582, 'test_acc@best': 0.8895705342292786, 'test_acc_threshold': 0.23309999704360962, 'test_roc_auc': 0.7246376872062683}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (36) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K  

ArXiv Papers {'test_loss': 0.31243982911109924, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.09120000153779984, 'test_acc@0.5': 0.9121212363243103, 'test_acc@best': 0.8999999761581421, 'test_acc_threshold': 0.09120000153779984, 'test_roc_auc': 0.7740863561630249}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


ArXiv Papers {'test_loss': 0.27988988161087036, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.4805000126361847, 'test_acc@0.5': 0.903323233127594, 'test_acc@best': 0.903323233127594, 'test_acc_threshold': 0.4805000126361847, 'test_roc_auc': 0.7692307829856873}


Gutenberg:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (35) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
----------------------------------------------------------------------------------

Gutenberg {'test_loss': 0.12161829322576523, 'test_f1@0.5': 0.0, 'test_f1@best': 0.3076923191547394, 'test_f1_threshold': 0.05389999970793724, 'test_acc@0.5': 0.9708737730979919, 'test_acc@best': 0.9708737730979919, 'test_acc_threshold': 0.05389999970793724, 'test_roc_auc': 0.6918518543243408}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (35) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K  

Gutenberg {'test_loss': 0.1526263803243637, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.07739999890327454, 'test_acc@0.5': 0.9646302461624146, 'test_acc@best': 0.9517685174942017, 'test_acc_threshold': 0.04800000041723251, 'test_roc_auc': 0.7027272582054138}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (35) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K  

Gutenberg {'test_loss': 0.13417141139507294, 'test_f1@0.5': 0.0, 'test_f1@best': 0.3529411852359772, 'test_f1_threshold': 0.10429999977350235, 'test_acc@0.5': 0.9646302461624146, 'test_acc@best': 0.9646302461624146, 'test_acc_threshold': 0.10429999977350235, 'test_roc_auc': 0.6572727560997009}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (35) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K  

Gutenberg {'test_loss': 0.15537889301776886, 'test_f1@0.5': 0.0, 'test_f1@best': 0.1428571492433548, 'test_f1_threshold': 0.4505000114440918, 'test_acc@0.5': 0.9615384340286255, 'test_acc@best': 0.9615384340286255, 'test_acc_threshold': 0.3366999924182892, 'test_roc_auc': 0.5980555415153503}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (35) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | 

Gutenberg {'test_loss': 0.12684474885463715, 'test_f1@0.5': 0.1666666716337204, 'test_f1@best': 0.1818181872367859, 'test_f1_threshold': 0.6624000072479248, 'test_acc@0.5': 0.9677419066429138, 'test_acc@best': 0.9709677696228027, 'test_acc_threshold': 0.6624000072479248, 'test_roc_auc': 0.6579999923706055}


Bundestag$_{de}$:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
----------------------------------------------------------------------------------

Bundestag$_{de}$ {'test_loss': 0.6375802755355835, 'test_f1@0.5': 0.0, 'test_f1@best': 0.014388489536941051, 'test_f1_threshold': 0.4740000069141388, 'test_acc@0.5': 0.6752941012382507, 'test_acc@best': 0.677647054195404, 'test_acc_threshold': 0.4740000069141388, 'test_roc_auc': 0.6036535501480103}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (44) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K  

Bundestag$_{de}$ {'test_loss': 0.6502804756164551, 'test_f1@0.5': 0.12987013161182404, 'test_f1@best': 0.13245032727718353, 'test_f1_threshold': 0.5202999711036682, 'test_acc@0.5': 0.6778846383094788, 'test_acc@best': 0.6322115659713745, 'test_acc_threshold': 0.38960000872612, 'test_roc_auc': 0.5170084834098816}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | 

Bundestag$_{de}$ {'test_loss': 0.6334179043769836, 'test_f1@0.5': 0.16249999403953552, 'test_f1@best': 0.0, 'test_f1_threshold': 0.6571999788284302, 'test_acc@0.5': 0.6607595086097717, 'test_acc@best': 0.6936708688735962, 'test_acc_threshold': 0.5652999877929688, 'test_roc_auc': 0.4905690550804138}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K  

Bundestag$_{de}$ {'test_loss': 0.5808930993080139, 'test_f1@0.5': 0.22058823704719543, 'test_f1@best': 0.0, 'test_f1_threshold': 0.6901000142097473, 'test_acc@0.5': 0.7260981798171997, 'test_acc@best': 0.7364341020584106, 'test_acc_threshold': 0.6901000142097473, 'test_roc_auc': 0.5600637197494507}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.
/home/staff_homes/mastoeck/Projects/PrismAI/.venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (46) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | 

Bundestag$_{de}$ {'test_loss': 0.598507821559906, 'test_f1@0.5': 0.0, 'test_f1@best': 0.032786883413791656, 'test_f1_threshold': 0.31859999895095825, 'test_acc@0.5': 0.7174999713897705, 'test_acc@best': 0.7124999761581421, 'test_acc_threshold': 0.3179999887943268, 'test_roc_auc': 0.5963738560676575}


Spiegel$_{de}$:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Spiegel$_{de}$ {'test_loss': 0.671820342540741, 'test_f1@0.5': 0.2741433084011078, 'test_f1@best': 0.0401606410741806, 'test_f1_threshold': 0.5475000143051147, 'test_acc@0.5': 0.5693160891532898, 'test_acc@best': 0.5767098069190979, 'test_acc_threshold': 0.44760000705718994, 'test_roc_auc': 0.6006085872650146}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Spiegel$_{de}$ {'test_loss': 0.586842954158783, 'test_f1@0.5': 0.7181328535079956, 'test_f1@best': 0.5948718190193176, 'test_f1_threshold': 0.7799999713897705, 'test_acc@0.5': 0.705440878868103, 'test_acc@best': 0.7298311591148376, 'test_acc_threshold': 0.675599992275238, 'test_roc_auc': 0.795379102230072}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWit

Spiegel$_{de}$ {'test_loss': 0.5470783114433289, 'test_f1@0.5': 0.6792452931404114, 'test_f1@best': 0.5856353640556335, 'test_f1_threshold': 0.6912999749183655, 'test_acc@0.5': 0.742911159992218, 'test_acc@best': 0.7580340504646301, 'test_acc_threshold': 0.3716999888420105, 'test_roc_auc': 0.8182387351989746}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.


Spiegel$_{de}$ {'test_loss': 0.5947065949440002, 'test_f1@0.5': 0.6168674826622009, 'test_f1@best': 0.5743589997291565, 'test_f1_threshold': 0.5881999731063843, 'test_acc@0.5': 0.701127827167511, 'test_acc@best': 0.7086465954780579, 'test_acc_threshold': 0.22869999706745148, 'test_roc_auc': 0.792945384979248}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Spiegel$_{de}$ {'test_loss': 0.6120734810829163, 'test_f1@0.5': 0.7028112411499023, 'test_f1@best': 0.6896551847457886, 'test_f1_threshold': 0.609000027179718, 'test_acc@0.5': 0.7228464484214783, 'test_acc@best': 0.7247191071510315, 'test_acc_threshold': 0.5781999826431274, 'test_roc_auc': 0.8011680841445923}


In [16]:
config

{'eval_split': 0.1,
 'test_split': 0.2,
 'feature_model': 'gpt2',
 'synth_agent': 'gpt-4o-mini',
 'document_type': 'chunk',
 'additional_match_conditions': {'$or': [{'document.synth_token_end': {'$exists': False}},
   {'document.synth_token_end': {'$lt': 1024}}]},
 'second_dim_as_channels': True,
 'feature_dim': TwoDimFeatures(width=256, height=13),
 'featurizer': 'IntermediateLikelihood(last_n=13)',
 'slicer': 'SliceRandomMultiple(size=64, multiple=4, stride=16, sort=False)',
 'num_samples': None,
 'projection_dim': 32,
 'learning_rate': 0.0001,
 'warmup_steps': 66,
 'max_epochs': 50,
 'gradient_clip_val': 1.0,
 'batch_size': 32,
 'conv_layer_shapes': [(64, 5, 1),
  (128, 3, 1),
  (128, 3, 1),
  (128, 3, 1),
  (64, 3, 1)],
 'seed': 42}

In [17]:
df = pd.DataFrame(
    [
        {
            "domain": domain,
            **{
                "test_roc_auc": metric["test_roc_auc"],
                "test_f1@0.5": metric["test_f1@0.5"],
            },
        }
        for domain in domains
        for metric in metrics_in_domain[domain]
    ]
)
df = df.groupby("domain").mean().sort_index(key=lambda i: list(map(list(domains.keys()).index, i)))
print(
    df.to_latex(
        # float_format="\\np{%.3f}",
        float_format="%.3f",
        index=False,
    )
)
df

\begin{tabular}{rr}
\toprule
test_roc_auc & test_f1@0.5 \\
\midrule
0.888 & 0.657 \\
0.834 & 0.671 \\
0.624 & 0.254 \\
0.965 & 0.907 \\
0.760 & 0.514 \\
0.759 & 0.000 \\
0.662 & 0.033 \\
0.554 & 0.103 \\
0.762 & 0.598 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,test_roc_auc,test_f1@0.5
domain,Unnamed: 1_level_1,Unnamed: 2_level_1
Blog Authorship,0.888435,0.657157
Student Essays,0.8336,0.670651
CNN News,0.624256,0.254342
Euro Court Cases,0.965211,0.906693
House of Commons,0.759637,0.514002
ArXiv Papers,0.758776,0.0
Gutenberg,0.661581,0.033333
Bundestag$_{de}$,0.553534,0.102592
Spiegel$_{de}$,0.761668,0.59824


## Out-of-Domain

In [18]:
from collections import defaultdict

metrics_out_of_domain = defaultdict(list)
for domain in tqdm(splits.keys()):
    for _ in trange(5, desc=domain, position=1):
        seed_everything(config["seed"])
        train_subsets = []
        eval_subsets = []
        for other, subsets in splits.items():
            if other == domain:
                test_dataset = subsets.pop(0)
                subsets.append(test_dataset)
            else:
                eval_dataset = subsets.pop(0)
                eval_subsets.append(eval_dataset)
                train_subsets.extend(subsets[2:])
                subsets.append(eval_dataset)

        train_dataloader = get_dataloader(*train_subsets, shuffle=True)
        eval_dataloader = get_dataloader(*eval_subsets)
        test_dataloader = get_dataloader(test_dataset)

        model = CNNDocumentClassficationModel(**config)
        trainer = Trainer(
            max_epochs=config["max_epochs"],
            logger=pl_loggers.TensorBoardLogger(
                save_dir=f"logs/in_domain/{type(featurizer).__name__}",
                name=domain,
            ),
            gradient_clip_val=config["gradient_clip_val"],
            callbacks=[EarlyStopping(monitor="val_loss", mode="min", patience=3)],
            deterministic=True,
        )
        trainer.progress_bar_callback.disable()

        trainer.fit(
            model,
            train_dataloaders=train_dataloader,
            val_dataloaders=eval_dataloader,
        )
        (metrics,) = trainer.test(model, test_dataloader, verbose=False)
        metrics_out_of_domain[domain].append(metrics)

        print(domain, metrics)

  0%|          | 0/9 [00:00<?, ?it/s]

Blog Authorship:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Blog Authorship {'test_loss': 0.7014198899269104, 'test_f1@0.5': 0.1411764770746231, 'test_f1@best': 0.2291666716337204, 'test_f1_threshold': 0.47369998693466187, 'test_acc@0.5': 0.65887850522995, 'test_acc@best': 0.5420560836791992, 'test_acc_threshold': 0.39890000224113464, 'test_roc_auc': 0.4534375071525574}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.


Blog Authorship {'test_loss': 0.8140376210212708, 'test_f1@0.5': 0.29931971430778503, 'test_f1@best': 0.25, 'test_f1_threshold': 0.774399995803833, 'test_acc@0.5': 0.5422222018241882, 'test_acc@best': 0.5822222232818604, 'test_acc_threshold': 0.527899980545044, 'test_roc_auc': 0.4947555661201477}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Blog Authorship {'test_loss': 0.7915789484977722, 'test_f1@0.5': 0.13483145833015442, 'test_f1@best': 0.09638553857803345, 'test_f1_threshold': 0.5360999703407288, 'test_acc@0.5': 0.646789014339447, 'test_acc@best': 0.6009174585342407, 'test_acc_threshold': 0.38690000772476196, 'test_roc_auc': 0.36764705181121826}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Blog Authorship {'test_loss': 0.6675863862037659, 'test_f1@0.5': 0.2448979616165161, 'test_f1@best': 0.07894736528396606, 'test_f1_threshold': 0.6592000126838684, 'test_acc@0.5': 0.6636363863945007, 'test_acc@best': 0.6863636374473572, 'test_acc_threshold': 0.6191999912261963, 'test_roc_auc': 0.4841904640197754}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Blog Authorship {'test_loss': 0.637465238571167, 'test_f1@0.5': 0.14705882966518402, 'test_f1@best': 0.1492537260055542, 'test_f1_threshold': 0.5192000269889832, 'test_acc@0.5': 0.723809540271759, 'test_acc@best': 0.7142857313156128, 'test_acc_threshold': 0.4212999939918518, 'test_roc_auc': 0.46577778458595276}


Student Essays:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Student Essays {'test_loss': 1.2043603658676147, 'test_f1@0.5': 0.4535315930843353, 'test_f1@best': 0.23999999463558197, 'test_f1_threshold': 0.7828999757766724, 'test_acc@0.5': 0.4842105209827423, 'test_acc@best': 0.46315789222717285, 'test_acc_threshold': 0.6481000185012817, 'test_roc_auc': 0.41772839426994324}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Student Essays {'test_loss': 1.7886734008789062, 'test_f1@0.5': 0.44859811663627625, 'test_f1@best': 0.21929824352264404, 'test_f1_threshold': 0.9186999797821045, 'test_acc@0.5': 0.3896551728248596, 'test_acc@best': 0.3965517282485962, 'test_acc_threshold': 0.8992999792098999, 'test_roc_auc': 0.3464285731315613}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Student Essays {'test_loss': 1.1458072662353516, 'test_f1@0.5': 0.1318681389093399, 'test_f1@best': 0.223300963640213, 'test_f1_threshold': 0.3995000123977661, 'test_acc@0.5': 0.4513888955116272, 'test_acc@best': 0.4444444477558136, 'test_acc_threshold': 0.3995000123977661, 'test_roc_auc': 0.38473430275917053}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Student Essays {'test_loss': 1.5203402042388916, 'test_f1@0.5': 0.21674877405166626, 'test_f1@best': 0.21463415026664734, 'test_f1_threshold': 0.4875999987125397, 'test_acc@0.5': 0.4498269855976105, 'test_acc@best': 0.43252596259117126, 'test_acc_threshold': 0.41929998993873596, 'test_roc_auc': 0.4064987897872925}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Student Essays {'test_loss': 1.3974796533584595, 'test_f1@0.5': 0.33834585547447205, 'test_f1@best': 0.21052631735801697, 'test_f1_threshold': 0.6668999791145325, 'test_acc@0.5': 0.3888888955116272, 'test_acc@best': 0.3854166567325592, 'test_acc_threshold': 0.6031000018119812, 'test_roc_auc': 0.34869566559791565}


CNN News:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

CNN News {'test_loss': 0.7425375580787659, 'test_f1@0.5': 0.3025210201740265, 'test_f1@best': 0.32786884903907776, 'test_f1_threshold': 0.48179998993873596, 'test_acc@0.5': 0.6527196764945984, 'test_acc@best': 0.6569037437438965, 'test_acc_threshold': 0.48179998993873596, 'test_roc_auc': 0.6259925365447998}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

CNN News {'test_loss': 0.7579054236412048, 'test_f1@0.5': 0.359375, 'test_f1@best': 0.3720930218696594, 'test_f1_threshold': 0.49300000071525574, 'test_acc@0.5': 0.6569037437438965, 'test_acc@best': 0.6610878705978394, 'test_acc_threshold': 0.49300000071525574, 'test_roc_auc': 0.6128090023994446}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

CNN News {'test_loss': 0.7100023627281189, 'test_f1@0.5': 0.09638553857803345, 'test_f1@best': 0.0941176488995552, 'test_f1_threshold': 0.4722999930381775, 'test_acc@0.5': 0.6696035265922546, 'test_acc@best': 0.6696035265922546, 'test_acc_threshold': 0.40459999442100525, 'test_roc_auc': 0.5982683897018433}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

CNN News {'test_loss': 0.718602180480957, 'test_f1@0.5': 0.2539682686328888, 'test_f1@best': 0.2068965584039688, 'test_f1_threshold': 0.6237999796867371, 'test_acc@0.5': 0.6194332242012024, 'test_acc@best': 0.6113360524177551, 'test_acc_threshold': 0.46230000257492065, 'test_roc_auc': 0.623711347579956}



  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


CNN News {'test_loss': 0.8547036647796631, 'test_f1@0.5': 0.16326530277729034, 'test_f1@best': 0.2931034564971924, 'test_f1_threshold': 0.41200000047683716, 'test_acc@0.5': 0.6554622054100037, 'test_acc@best': 0.651260495185852, 'test_acc_threshold': 0.40700000524520874, 'test_roc_auc': 0.6317424178123474}


Euro Court Cases:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Euro Court Cases {'test_loss': 0.61805260181427, 'test_f1@0.5': 0.5297297239303589, 'test_f1@best': 0.10000000149011612, 'test_f1_threshold': 0.7832000255584717, 'test_acc@0.5': 0.6692014932632446, 'test_acc@best': 0.6425855755805969, 'test_acc_threshold': 0.6881999969482422, 'test_roc_auc': 0.7129203677177429}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Euro Court Cases {'test_loss': 0.6455636620521545, 'test_f1@0.5': 0.5876288414001465, 'test_f1@best': 0.2945736348628998, 'test_f1_threshold': 0.8432999849319458, 'test_acc@0.5': 0.6850393414497375, 'test_acc@best': 0.6771653294563293, 'test_acc_threshold': 0.742900013923645, 'test_roc_auc': 0.7197436094284058}



  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU insid

Euro Court Cases {'test_loss': 0.9466021656990051, 'test_f1@0.5': 0.18644067645072937, 'test_f1@best': 0.1551724076271057, 'test_f1_threshold': 0.5609999895095825, 'test_acc@0.5': 0.6264591217041016, 'test_acc@best': 0.6498054265975952, 'test_acc_threshold': 0.43939998745918274, 'test_roc_auc': 0.6992523074150085}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Euro Court Cases {'test_loss': 0.9628025889396667, 'test_f1@0.5': 0.1515151560306549, 'test_f1@best': 0.13846154510974884, 'test_f1_threshold': 0.5414000153541565, 'test_acc@0.5': 0.5789473652839661, 'test_acc@best': 0.5789473652839661, 'test_acc_threshold': 0.4867999851703644, 'test_roc_auc': 0.6371264457702637}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Euro Court Cases {'test_loss': 0.6437916159629822, 'test_f1@0.5': 0.5161290168762207, 'test_f1@best': 0.316546767950058, 'test_f1_threshold': 0.7802000045776367, 'test_acc@0.5': 0.6538461446762085, 'test_acc@best': 0.642307698726654, 'test_acc_threshold': 0.6804999709129333, 'test_roc_auc': 0.699696958065033}


House of Commons:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

House of Commons {'test_loss': 0.9836200475692749, 'test_f1@0.5': 0.4864864945411682, 'test_f1@best': 0.30463576316833496, 'test_f1_threshold': 0.8252999782562256, 'test_acc@0.5': 0.44117647409439087, 'test_acc@best': 0.4663865566253662, 'test_acc_threshold': 0.5397999882698059, 'test_roc_auc': 0.521060585975647}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

House of Commons {'test_loss': 1.0783066749572754, 'test_f1@0.5': 0.44999998807907104, 'test_f1@best': 0.3380281627178192, 'test_f1_threshold': 0.8446999788284302, 'test_acc@0.5': 0.42105263471603394, 'test_acc@best': 0.49561402201652527, 'test_acc_threshold': 0.5805000066757202, 'test_roc_auc': 0.5243589878082275}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

House of Commons {'test_loss': 0.9628301858901978, 'test_f1@0.5': 0.5223880410194397, 'test_f1@best': 0.4431818127632141, 'test_f1_threshold': 0.7924000024795532, 'test_acc@0.5': 0.47755101323127747, 'test_acc@best': 0.5224489569664001, 'test_acc_threshold': 0.6176000237464905, 'test_roc_auc': 0.5834385752677917}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

House of Commons {'test_loss': 0.9430379867553711, 'test_f1@0.5': 0.5204461216926575, 'test_f1@best': 0.44329896569252014, 'test_f1_threshold': 0.7028999924659729, 'test_acc@0.5': 0.45798319578170776, 'test_acc@best': 0.5378151535987854, 'test_acc_threshold': 0.6376000046730042, 'test_roc_auc': 0.5543939471244812}



  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


House of Commons {'test_loss': 0.9465743899345398, 'test_f1@0.5': 0.4146341383457184, 'test_f1@best': 0.3733333349227905, 'test_f1_threshold': 0.6043000221252441, 'test_acc@0.5': 0.5675675868988037, 'test_acc@best': 0.5585585832595825, 'test_acc_threshold': 0.5414000153541565, 'test_roc_auc': 0.5163888931274414}


ArXiv Papers:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

ArXiv Papers {'test_loss': 0.5493637919425964, 'test_f1@0.5': 0.06060606241226196, 'test_f1@best': 0.0, 'test_f1_threshold': 0.574999988079071, 'test_acc@0.5': 0.8132529854774475, 'test_acc@best': 0.8072289228439331, 'test_acc_threshold': 0.4925999939441681, 'test_roc_auc': 0.5112612843513489}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

ArXiv Papers {'test_loss': 1.0853912830352783, 'test_f1@0.5': 0.125, 'test_f1@best': 0.13333334028720856, 'test_f1_threshold': 0.8877000212669373, 'test_acc@0.5': 0.4749999940395355, 'test_acc@best': 0.8062499761581421, 'test_acc_threshold': 0.8313999772071838, 'test_roc_auc': 0.6013333201408386}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

ArXiv Papers {'test_loss': 0.36529844999313354, 'test_f1@0.5': 0.3199999928474426, 'test_f1@best': 0.2222222238779068, 'test_f1_threshold': 0.5781999826431274, 'test_acc@0.5': 0.8975903391838074, 'test_acc@best': 0.9096385836601257, 'test_acc_threshold': 0.5088000297546387, 'test_roc_auc': 0.6366446018218994}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

ArXiv Papers {'test_loss': 0.5475499629974365, 'test_f1@0.5': 0.1666666716337204, 'test_f1@best': 0.1875, 'test_f1_threshold': 0.5647000074386597, 'test_acc@0.5': 0.8170731663703918, 'test_acc@best': 0.7682926654815674, 'test_acc_threshold': 0.43220001459121704, 'test_roc_auc': 0.5904605388641357}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


ArXiv Papers {'test_loss': 0.3853815495967865, 'test_f1@0.5': 0.10526315867900848, 'test_f1@best': 0.10526315867900848, 'test_f1_threshold': 0.4740999937057495, 'test_acc@0.5': 0.895061731338501, 'test_acc@best': 0.8395061492919922, 'test_acc_threshold': 0.3075999915599823, 'test_roc_auc': 0.4136062562465668}


Gutenberg:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Gutenberg {'test_loss': 0.7401473522186279, 'test_f1@0.5': 0.03076923079788685, 'test_f1@best': 0.0476190485060215, 'test_f1_threshold': 0.6570000052452087, 'test_acc@0.5': 0.6012658476829529, 'test_acc@best': 0.7405063509941101, 'test_acc_threshold': 0.6453999876976013, 'test_roc_auc': 0.32749998569488525}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Gutenberg {'test_loss': 0.7313435673713684, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.6765999794006348, 'test_acc@0.5': 0.6118420958518982, 'test_acc@best': 0.8157894611358643, 'test_acc_threshold': 0.6765999794006348, 'test_roc_auc': 0.24666666984558105}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Gutenberg {'test_loss': 0.8225980997085571, 'test_f1@0.5': 0.0615384615957737, 'test_f1@best': 0.0, 'test_f1_threshold': 0.8223000168800354, 'test_acc@0.5': 0.6139240264892578, 'test_acc@best': 0.7911392450332642, 'test_acc_threshold': 0.7656999826431274, 'test_roc_auc': 0.3166666626930237}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Gutenberg {'test_loss': 0.35263168811798096, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.482699990272522, 'test_acc@0.5': 0.9108280539512634, 'test_acc@best': 0.8407643437385559, 'test_acc_threshold': 0.37220001220703125, 'test_roc_auc': 0.41428571939468384}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Gutenberg {'test_loss': 0.732973575592041, 'test_f1@0.5': 0.0317460335791111, 'test_f1@best': 0.0, 'test_f1_threshold': 0.6771000027656555, 'test_acc@0.5': 0.6038960814476013, 'test_acc@best': 0.5714285969734192, 'test_acc_threshold': 0.4771000146865845, 'test_roc_auc': 0.3466666638851166}


Bundestag$_{de}$:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Bundestag$_{de}$ {'test_loss': 0.6607787609100342, 'test_f1@0.5': 0.3300970792770386, 'test_f1@best': 0.3801652789115906, 'test_f1_threshold': 0.38260000944137573, 'test_acc@0.5': 0.6567164063453674, 'test_acc@best': 0.49751242995262146, 'test_acc_threshold': 0.27250000834465027, 'test_roc_auc': 0.5249999761581421}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Bundestag$_{de}$ {'test_loss': 0.7382518649101257, 'test_f1@0.5': 0.0731707289814949, 'test_f1@best': 0.1041666641831398, 'test_f1_threshold': 0.3984000086784363, 'test_acc@0.5': 0.644859790802002, 'test_acc@best': 0.5887850522994995, 'test_acc_threshold': 0.3043999969959259, 'test_roc_auc': 0.5251984000205994}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Bundestag$_{de}$ {'test_loss': 0.7441845536231995, 'test_f1@0.5': 0.3612903356552124, 'test_f1@best': 0.028169013559818268, 'test_f1_threshold': 0.8187999725341797, 'test_acc@0.5': 0.5308057069778442, 'test_acc@best': 0.649289071559906, 'test_acc_threshold': 0.7416999936103821, 'test_roc_auc': 0.5100313425064087}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside 

Bundestag$_{de}$ {'test_loss': 0.6705532670021057, 'test_f1@0.5': 0.2526315748691559, 'test_f1@best': 0.3050847351551056, 'test_f1_threshold': 0.3935000002384186, 'test_acc@0.5': 0.6536585092544556, 'test_acc@best': 0.5121951103210449, 'test_acc_threshold': 0.305400013923645, 'test_roc_auc': 0.515001118183136}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Bundestag$_{de}$ {'test_loss': 0.7784735560417175, 'test_f1@0.5': 0.26771652698516846, 'test_f1@best': 0.03999999910593033, 'test_f1_threshold': 0.8260999917984009, 'test_acc@0.5': 0.5105262994766235, 'test_acc@best': 0.7105262875556946, 'test_acc_threshold': 0.7089999914169312, 'test_roc_auc': 0.4316321909427643}


Spiegel$_{de}$:   0%|          | 0/5 [00:00<?, ?it/s]

Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?  

Spiegel$_{de}$ {'test_loss': 0.7532823085784912, 'test_f1@0.5': 0.09022556245326996, 'test_f1@best': 0.0, 'test_f1_threshold': 0.7141000032424927, 'test_acc@0.5': 0.5551470518112183, 'test_acc@best': 0.5514705777168274, 'test_acc_threshold': 0.6700999736785889, 'test_roc_auc': 0.5569945573806763}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Seed set to 42
Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.


Spiegel$_{de}$ {'test_loss': 0.7750298380851746, 'test_f1@0.5': 0.14173229038715363, 'test_f1@best': 0.0, 'test_f1_threshold': 0.8518000245094299, 'test_acc@0.5': 0.5839694738388062, 'test_acc@best': 0.5687022805213928, 'test_acc_threshold': 0.5989000201225281, 'test_roc_auc': 0.5307738184928894}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Spiegel$_{de}$ {'test_loss': 0.7461361885070801, 'test_f1@0.5': 0.131386861205101, 'test_f1@best': 0.0, 'test_f1_threshold': 0.7450000047683716, 'test_acc@0.5': 0.5576208233833313, 'test_acc@best': 0.5576208233833313, 'test_acc_threshold': 0.6240000128746033, 'test_roc_auc': 0.5484594106674194}


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK

Spiegel$_{de}$ {'test_loss': 0.7965099811553955, 'test_f1@0.5': 0.016393441706895828, 'test_f1@best': 0.0, 'test_f1_threshold': 0.5058000087738037, 'test_acc@0.5': 0.5522388219833374, 'test_acc@best': 0.5522388219833374, 'test_acc_threshold': 0.5027999877929688, 'test_roc_auc': 0.5754802227020264}


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
Loading `train_dataloader` to estimate number of stepping batches.

  | Name        | Type              | Params | Mode  | In sizes    | Out sizes
------------------------------------------------------------------------------------
0 | conv_layers | Sequential        | 152 K  | train | ?           | ?        
1 | projection  | Sequential        | 524 K  | train | [32, 16384] | [32, 32] 
2 | classifier  | Linear            | 33     | train | [32, 32]    | [32, 1]  
3 | criterion   | BCEWithLogitsLoss | 0      | train | ?           | ?        
------------------------------------------------------------------------------------
676 K     Trainable params
0         Non-trainable params
676 K     Total params
2.706     Total estimated model params size (MB)
17        Modules in train mode
0         Modules in eval mode
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Spiegel$_{de}$ {'test_loss': 0.8041268587112427, 'test_f1@0.5': 0.0, 'test_f1@best': 0.0, 'test_f1_threshold': 0.4325000047683716, 'test_acc@0.5': 0.5454545617103577, 'test_acc@best': 0.5381818413734436, 'test_acc_threshold': 0.3434000015258789, 'test_roc_auc': 0.5606933236122131}


In [19]:
df = pd.DataFrame(
    [
        {
            "domain": domain,
            **{
                "test_roc_auc": metric["test_roc_auc"],
                "test_f1@0.5": metric["test_f1@0.5"],
            },
        }
        for domain in domains
        for metric in metrics_out_of_domain[domain]
    ]
)
df = df.groupby("domain").mean().sort_index(key=lambda i: list(map(list(domains.keys()).index, i)))
print(
    df.to_latex(
        # float_format="\\np{%.3f}",
        float_format="%.3f",
        index=False,
    )
)
df

\begin{tabular}{rr}
\toprule
test_roc_auc & test_f1@0.5 \\
\midrule
0.453 & 0.193 \\
0.381 & 0.318 \\
0.619 & 0.235 \\
0.694 & 0.394 \\
0.540 & 0.479 \\
0.551 & 0.156 \\
0.330 & 0.025 \\
0.501 & 0.257 \\
0.554 & 0.076 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,test_roc_auc,test_f1@0.5
domain,Unnamed: 1_level_1,Unnamed: 2_level_1
Blog Authorship,0.453162,0.193457
Student Essays,0.380817,0.317818
CNN News,0.618505,0.235103
Euro Court Cases,0.693748,0.394289
House of Commons,0.539928,0.478791
ArXiv Papers,0.550661,0.155507
Gutenberg,0.330357,0.024811
Bundestag$_{de}$,0.501373,0.256981
Spiegel$_{de}$,0.55448,0.075948
