In [6]:
import sys
sys.path.append("../src")
import paths
import dataset
import train
import utils
import torch
import pickle
import evaluation

from datasets import load_from_disk

In [7]:
train_flag = True

In [8]:
TRAIN_DATA = paths.data/"manzoni_train_tokens.csv"
OOD_DATA = paths.data/"manzoni_dev_tokens.csv"
HF_DATA = paths.data/"prepared"
torch.set_float32_matmul_precision("high")   # enable TF32 matmuls on Ampere
torch.backends.cudnn.allow_tf32 = True 

In [9]:
import numpy as np
from collections import Counter

def check_labels(hfds_split, sample_rows=2000):
    # Concatenate labels from a subset of rows (pad to same length already handled by collator)
    n = min(sample_rows, len(hfds_split))
    cats = []
    for ex in hfds_split.select(range(n)):
        labs = np.array(ex["labels"])
        cats.append(labs)
    all_labs = np.concatenate(cats)
    visible = all_labs[all_labs != -100]
    uniq = np.unique(visible)
    print("Unique visible labels:", uniq)
    bad = [x for x in uniq if x not in (0, 1)]
    if bad:
        print("❌ Found out-of-range labels:", bad)
    else:
        print("✅ Labels look fine (only 0/1).")
    return uniq

In [10]:
if train_flag:
    import importlib
    importlib.reload(dataset)
    importlib.reload(train)

    import os
    os.environ["CUDA_LAUNCH_BLOCKING"] = "1"  # makes the exception point to the correct op

    results = {}
    for model_key in ["deberta", "modernbert", "bert"]:
        pairs = dataset.read_token_label_file(TRAIN_DATA)
        sents_tok, sents_lab = dataset.group_into_sentences(pairs)
        ds_full = dataset.build_hf_dataset_for_token_classification(sents_tok, sents_lab, model_key=model_key)
        split = ds_full.train_test_split(train_size=0.8, seed=69)
        train_ds = dataset.tidy(split["train"], model_key)
        val_ds   = dataset.tidy(split["test"], model_key)
        train_ds.save_to_disk(HF_DATA/f"{model_key}"/"train")
        val_ds.save_to_disk(HF_DATA/f"{model_key}"/"val")
        _ = check_labels(train_ds)  # your ModernBERT train split
        _ = check_labels(val_ds)
        print(f"\n=== Training {model_key} -> {utils.MODEL_SPECS[model_key].name} ===")
        out_dir = str(paths.chekpoints / model_key)
        results[model_key] = train.train_token_splitter(
            train_ds, val_ds,
            model_key=model_key, out_dir=out_dir,
            lr=5e-5, batch_size=8, epochs=3,
        )
    with open(paths.results/"token_class_eval.pkl", "wb") as f:
        pickle.dump(results, f)
else:
    with open(paths.results/"token_class_eval.pkl", "rb") as f:
        results = pickle.load(f)

print(results)

microsoft/deberta-v3-base


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]



Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/298 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/238 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/60 [00:00<?, ? examples/s]

Unique visible labels: [0 1]
✅ Labels look fine (only 0/1).
Unique visible labels: [0 1]
✅ Labels look fine (only 0/1).

=== Training deberta -> microsoft/deberta-v3-base ===


pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

Some weights of DebertaV2ForTokenClassification were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.004829,0.978261,0.985401,0.981818,0.998844
2,0.114600,0.001354,0.994526,0.994526,0.994526,0.999653
3,0.114600,0.001019,0.998168,0.994526,0.996344,0.999769


[deberta] Validation: {'eval_loss': 0.0010188599117100239, 'eval_precision': 0.9981684981684982, 'eval_recall': 0.9945255474452555, 'eval_f1': 0.9963436928702011, 'eval_accuracy': 0.9997688529326784, 'eval_runtime': 0.5369, 'eval_samples_per_second': 111.753, 'eval_steps_per_second': 14.9, 'epoch': 3.0}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


answerdotai/ModernBERT-base


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/67 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/53 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/14 [00:00<?, ? examples/s]

Unique visible labels: [0 1]
✅ Labels look fine (only 0/1).
Unique visible labels: [0 1]
✅ Labels look fine (only 0/1).

=== Training modernbert -> answerdotai/ModernBERT-base ===


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/599M [00:00<?, ?B/s]

Some weights of ModernBertForTokenClassification were not initialized from the model checkpoint at answerdotai/ModernBERT-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': None, 'bos_token_id': None}.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.06417,0.876289,0.419408,0.567297,0.976423
2,No log,0.026599,0.924125,0.78125,0.846702,0.989575
3,No log,0.01283,0.951827,0.942434,0.947107,0.996121


[modernbert] Validation: {'eval_loss': 0.01282982062548399, 'eval_precision': 0.9518272425249169, 'eval_recall': 0.9424342105263158, 'eval_f1': 0.947107438016529, 'eval_accuracy': 0.9961209770289109, 'eval_runtime': 0.3558, 'eval_samples_per_second': 39.345, 'eval_steps_per_second': 5.621, 'epoch': 3.0}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


bert-base-multilingual-cased


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/264 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/211 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/53 [00:00<?, ? examples/s]

Unique visible labels: [0 1]
✅ Labels look fine (only 0/1).
Unique visible labels: [0 1]
✅ Labels look fine (only 0/1).

=== Training bert -> bert-base-multilingual-cased ===


model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,0.017423,0.846715,0.993151,0.914106,0.99369
2,0.080700,0.004065,0.954098,0.996575,0.974874,0.998263
3,0.080700,0.003142,0.989726,0.989726,0.989726,0.999305


[bert] Validation: {'eval_loss': 0.0031423659529536963, 'eval_precision': 0.9897260273972602, 'eval_recall': 0.9897260273972602, 'eval_f1': 0.9897260273972602, 'eval_accuracy': 0.9993053143452588, 'eval_runtime': 0.2373, 'eval_samples_per_second': 223.383, 'eval_steps_per_second': 29.503, 'epoch': 3.0}


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


{'deberta': {'eval_loss': 0.0010188599117100239, 'eval_precision': 0.9981684981684982, 'eval_recall': 0.9945255474452555, 'eval_f1': 0.9963436928702011, 'eval_accuracy': 0.9997688529326784, 'eval_runtime': 0.5369, 'eval_samples_per_second': 111.753, 'eval_steps_per_second': 14.9, 'epoch': 3.0}, 'modernbert': {'eval_loss': 0.01282982062548399, 'eval_precision': 0.9518272425249169, 'eval_recall': 0.9424342105263158, 'eval_f1': 0.947107438016529, 'eval_accuracy': 0.9961209770289109, 'eval_runtime': 0.3558, 'eval_samples_per_second': 39.345, 'eval_steps_per_second': 5.621, 'epoch': 3.0}, 'bert': {'eval_loss': 0.0031423659529536963, 'eval_precision': 0.9897260273972602, 'eval_recall': 0.9897260273972602, 'eval_f1': 0.9897260273972602, 'eval_accuracy': 0.9993053143452588, 'eval_runtime': 0.2373, 'eval_samples_per_second': 223.383, 'eval_steps_per_second': 29.503, 'epoch': 3.0}}


In [16]:
import pandas as pd
pd.DataFrame(results).T.sort_values("eval_f1", ascending=False)

KeyError: 'eval_f1'

In [15]:
import importlib
importlib.reload(evaluation)
best_key = max(results, key=lambda k: results[k]["eval_f1"])
model_dir = paths.chekpoints/best_key
best_trainer = evaluation.load_trainer_for_eval(model_dir, HF_DATA/best_key/"val")
val_ds = load_from_disk(HF_DATA/best_key/"val")

pred = best_trainer.predict(val_ds)  # logits + label_ids as np arrays
logits = pred.predictions
label_ids = pred.label_ids
tok = best_trainer.tokenizer

def sentences_from_word_seq(words, y_pred):
    sents, cur = [], []
    for w, b in zip(words, y_pred):
        cur.append(w)
        if b == 1:
            sents.append(cur); cur = []
    if cur: sents.append(cur)
    return sents

for i in range(min(3, len(val_ds))):
    ids = val_ds[i]["input_ids"]
    words = tok.convert_ids_to_tokens(ids)

    mask = (label_ids[i] != -100)          # np.bool_ array
    y_pred = logits[i].argmax(-1)[mask]    # predicted boundary labels at visible positions
    visible_words = [w for w, m in zip(words, mask.tolist()) if m]

    sents = sentences_from_word_seq(visible_words, y_pred)
    print(f"\nWindow {i} — predicted {len(sents)} sentences:")
    print(" | ".join([" ".join(s) for s in sents]))


evaluation.preview_predictions(best_trainer, val_ds, k=3)
evaluation.preview_full_sentences(best_trainer, val_ds, n_examples=2)
evaluation.preview_pred_vs_gold(best_trainer, val_ds, [1,2,3])

TypeError: 'int' object is not subscriptable

In [17]:
def error_examples(trainer, ds, max_show=10):
    out = trainer.predict(ds)
    preds = out.predictions.argmax(-1)
    labels = out.label_ids
    mask = labels != -100
    ids = ds["input_ids"]
    tok = trainer.tokenizer
    shown = 0
    results = []
    for i in range(len(ds)):
        m = mask[i]
        if not m.any(): continue
        y_true = labels[i][m]
        y_pred = preds[i][m]
        if (y_true != y_pred).any():
            results.append(i)
            words = tok.convert_ids_to_tokens(ds[i]["input_ids"])
            visible_words = [w for w,mm in zip(words, m) if mm]
            # mark predicted boundaries with "▌"
            pieces = []
            for w, b, t in zip(visible_words, y_pred, y_true):
                mark = "▌" if b==1 else ""
                pieces.append(w+mark)
            print(" ".join(pieces))
            shown += 1
            if shown >= max_show: break
    return results

results = error_examples(best_trainer, val_ds, max_show=5)
evaluation.preview_pred_vs_gold(best_trainer, val_ds, results)



AttributeError: `AcceleratorState` object has no attribute `distributed_type`. This happens if `AcceleratorState._reset_state()` was called and an `Accelerator` or `PartialState` was not reinitialized.

In [18]:
# OOD
ood_results = {}
importlib.reload(dataset)

for model_key in ["deberta", "modernbert", "bert"]:
    pairs = dataset.read_token_label_file(OOD_DATA)
    sents_tok, sents_lab = dataset.group_into_sentences(pairs)
    ds_full = dataset.build_hf_dataset_for_token_classification(sents_tok, sents_lab, model_key=model_key)
    ds_full.save_to_disk(HF_DATA/f"{model_key}"/"ood")

# -- OOD evaluation loop:
ood_results = {}
for model_key in ["deberta", "modernbert", "bert"]:
    model_dir = paths.chekpoints / model_key
    trainer = evaluation.load_trainer_for_eval(model_dir, HF_DATA / model_key / "ood")
    pred = trainer.predict(trainer.eval_dataset)  # use their own OOD eval set
    logits = pred.predictions
    labels = pred.label_ids

    metrics = evaluation.compute_prf(logits, labels)
    ood_results[model_key] = metrics
    print(f"{model_key} OOD Results:", metrics)

microsoft/deberta-v3-base




Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/37 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/37 [00:00<?, ? examples/s]

answerdotai/ModernBERT-base


Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/9 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/9 [00:00<?, ? examples/s]

bert-base-multilingual-cased


Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/33 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/33 [00:00<?, ? examples/s]

  trainer = Trainer(


deberta OOD Results: {'precision': 0.9916666666666667, 'recall': 0.9972067039106145, 'f1': 0.9944289693593314, 'accuracy': 0.9996245893946504}


  trainer = Trainer(


modernbert OOD Results: {'precision': 0.9763313609467456, 'recall': 0.990990990990991, 'f1': 0.9836065573770492, 'accuracy': 0.998860103626943}


  trainer = Trainer(


bert OOD Results: {'precision': 0.9895287958115183, 'recall': 0.9973614775725593, 'f1': 0.9934296977660972, 'accuracy': 0.9995317036620773}


In [None]:
pd.DataFrame(ood_results).T.sort_values("f1", ascending=False)

Unnamed: 0,precision,recall,f1,accuracy
bert,0.992084,0.992084,0.992084,0.999438
modernbert,0.979351,0.996997,0.988095,0.999171
deberta,0.1875,0.00838,0.016043,0.965462


In [19]:
best_key = max(ood_results, key=lambda k: ood_results[k]["f1"])
model_dir = paths.chekpoints/best_key

ood_best_trainer = evaluation.load_trainer_for_eval(model_dir, HF_DATA / best_key / "ood")
ood_ds = load_from_disk(HF_DATA/best_key/"val")
results = error_examples(ood_best_trainer, ood_ds, max_show=5)

evaluation.preview_predictions(ood_best_trainer, ood_ds, k=3)
evaluation.preview_full_sentences(ood_best_trainer, ood_ds, n_examples=2)
evaluation.preview_pred_vs_gold(ood_best_trainer, ood_ds, results)

  trainer = Trainer(


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


▁più ▁aff ▁" ▁Feder ▁: ▁« ▁voi ▁a ▁una ▁bu ▁nu ▁da ▁dar ▁" ▁e ▁me ▁la ▁fate ▁tanto ▁so ▁? ▁»▌ ▁« ▁Una ▁bu ▁nu ▁" ▁io ▁?▌ ▁Ho ▁l ▁inferno ▁nel ▁cu ▁; ▁e ▁vi ▁dar ▁una ▁bu ▁nu ▁?▌ ▁Dit ▁voi ▁" ▁se ▁lo ▁sap ▁" ▁qual ▁è ▁quest ▁bu ▁nu ▁che ▁as ▁da ▁un ▁par ▁mio ▁. ▁»▌ ▁« ▁Che ▁Dio ▁v ▁ha ▁to ▁il ▁cu ▁" ▁e ▁vu ▁far ▁suo ▁" ▁» ▁ris ▁pa ▁il ▁cardinal ▁.▌ ▁« ▁Dio ▁! ▁Dio ▁! ▁Dio ▁!▌ ▁Se ▁lo ▁ved ▁!▌ ▁Se ▁lo ▁sent ▁!▌ ▁Dov ▁è ▁questo ▁Dio ▁? ▁»▌ ▁« ▁Voi ▁me ▁lo ▁do ▁? ▁voi ▁?▌ ▁E ▁chi ▁più ▁di ▁voi ▁l ▁ha ▁vic ▁?▌ ▁Non ▁ve ▁lo ▁sent ▁in ▁cu ▁" ▁che ▁v ▁op ▁" ▁che ▁v ▁a ▁" ▁che ▁non ▁vi ▁las ▁stare ▁" ▁e ▁n ▁st ▁tempo ▁v ▁at ▁" ▁vi ▁fa ▁present ▁una ▁s ▁di ▁quiet ▁" ▁di ▁cons ▁" ▁d ▁una ▁cons ▁che ▁sar ▁pie ▁" ▁i ▁" ▁sub ▁che ▁voi ▁lo ▁rico ▁" ▁lo ▁confess ▁" ▁l ▁impl ▁? ▁»▌ ▁« ▁Oh ▁" ▁cer ▁! ▁ho ▁qui ▁qual ▁cosa ▁che ▁m ▁op ▁" ▁che ▁mi ▁rode ▁!▌ ▁Ma ▁Dio ▁!▌ ▁Se ▁c ▁è ▁questo ▁Dio ▁" ▁se ▁è ▁quell ▁che ▁di ▁" ▁cosa ▁vol ▁che ▁f ▁di ▁me ▁? ▁»▌ ▁Quest ▁parole ▁fur ▁de ▁con ▁un ▁accent ▁disp ▁; ▁m

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.



Example 0 — 6 predicted sentences:
 • a cui dove tir il collo " per il ban di dome " e port ; per non bis mai and con le mani v da que sign .
 • Rac tutto l acc ; e ved che vi di " su due pie " di quell co che a no non ver in test " a pens un an . »
 • Renzo ab molto vol questo pare ; Lucia l app ; e Agnes " superb d a da " lev " a una a una " le po best dalla st " ri le loro gamb " come se faces un m di fi " le av e le st con uno spa " e le cons in man a Renzo ; il qual " date e rice parole di s " us dalla parte dell or " per non ved da rag " che gli corre diet " grid : lo spo ! lo spo ! Co "
 • at i camp o " come di col " i lu " se n and per vi " fre " ripen all sua dis " e rum il disc da fare al do Azz . Las poi
 • pens al let " come doves stare in via quell po best " cos leg e ten per le za " a capo all in gi " nella man d un u il qual " a da t passion " accom col gest i pens che gli pass a tumult per la men . Ora st
 • il bra per colle " or l a per dis " or lo di in a "

Example 

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.



### Example 0 — 6 predicted sentences

 • a cui dovevo tirare il collo "," per il banchetto di domenica "," e portateglieli ; perché non bisogna mai andar con le mani vòte da que' signori . Rac
 • contategli tutto l' accaduto ; e vedrete che vi dirà "," su due piedi "," di quelle cose che a noi non verrebbero in testa "," a pensarci un anno . » Renzo
 • abbracciò molto volentieri questo parere ; Lucia l' approvò ; e Agnese "," superba d' averlo dato "," levò "," a una a una "," le povere bestie dalla stìa "," riunì le loro otto gambe "," come se facesse un mazzetto di fiori "," le avvolse e le strinse con uno spago "," e le consegnò in mano a Renzo ; il quale "," date e ricevute parole di speranza "," uscì dalla parte dell' orto "," per non esser veduto da' ragazzi "," che gli correrebber dietro "," gridando : lo sposo ! lo sposo ! Co
 • sì "," attraversando i campi o "," come dicon colà "," i luoghi "," se n' andò per viottole "," fremendo "," ripensando alla sua disgrazia "," e rumi


### Window 1
P: più affettuosamente "," Federigo : « voi avete una buona nuova da darmi "," e me la fate tanto sospirare ? » « | Una buona nuova "," io ? Ho | l' inferno nel cuore ; e vi darò una buona nuova ? Dit | emi voi "," se lo sapete "," qual è questa buona nuova che aspettate da un par mio . » « | Che Dio v' ha toccato il cuore "," e vuol farvi suo "," » rispose pacatamente il cardinale . « | Dio ! Dio ! Dio ! Se | lo vedessi ! Se | lo sentissi ! Dov | ' è questo Dio ? » « | Voi me lo domandate ? voi ? E | chi più di voi l' ha vicino ? Non | ve lo sentite in cuore "," che v' opprime "," che v' agita "," che non vi lascia stare "," e nello stesso tempo v' attira "," vi fa presentire una speranza di quiete "," di consolazione "," d' una consolazione che sarà piena "," immensa "," subito che voi lo riconosciate "," lo confessiate "," l' imploriate ? » « | Oh "," certo ! ho qui qualche cosa che m' opprime "," che mi rode ! Ma | Dio ! Se | c' è questo Dio "," se è quello che dicono

In [20]:
import importlib
import pandas as pd
import evaluation
from datasets import load_from_disk

importlib.reload(evaluation)

model_keys = ["deberta", "modernbert", "bert"]

for model_key in model_keys:
    model_dir = paths.chekpoints / model_key

    # --- Validation ---
    val_ds_path = HF_DATA / model_key / "val"
    val_ds = load_from_disk(val_ds_path)
    val_tr = evaluation.load_trainer_for_eval(model_dir, val_ds_path)
    val_out = paths.results / f"{model_key}_val_tokens.csv"   # or .parquet / .jsonl
    _, val_summary = evaluation.save_token_predictions(val_tr, val_ds, val_out, word_only=True)
    print(f"[{model_key}] saved VAL predictions:", val_summary)

    # --- OOD ---
    ood_ds_path = HF_DATA / model_key / "ood"
    ood_ds = load_from_disk(ood_ds_path)
    ood_tr = evaluation.load_trainer_for_eval(model_dir, ood_ds_path)
    ood_out = paths.results / f"{model_key}_ood_tokens.csv"   # or .parquet / .jsonl
    _, ood_summary = evaluation.save_token_predictions(ood_tr, ood_ds, ood_out, word_only=True)
    print(f"[{model_key}] saved OOD predictions:", ood_summary)


  trainer = Trainer(


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


[deberta] saved VAL predictions: {'n_samples': 60, 'n_rows': 17305, 'word_only': True, 'path': '/home/user/mnlp/notebooks/../results/deberta_val_tokens.csv'}


  trainer = Trainer(


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


[deberta] saved OOD predictions: {'n_samples': 37, 'n_rows': 10655, 'word_only': True, 'path': '/home/user/mnlp/notebooks/../results/deberta_ood_tokens.csv'}


  trainer = Trainer(


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


[modernbert] saved VAL predictions: {'n_samples': 14, 'n_rows': 16499, 'word_only': True, 'path': '/home/user/mnlp/notebooks/../results/modernbert_val_tokens.csv'}


  trainer = Trainer(


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


[modernbert] saved OOD predictions: {'n_samples': 9, 'n_rows': 9650, 'word_only': True, 'path': '/home/user/mnlp/notebooks/../results/modernbert_ood_tokens.csv'}


  trainer = Trainer(


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


[bert] saved VAL predictions: {'n_samples': 53, 'n_rows': 17274, 'word_only': True, 'path': '/home/user/mnlp/notebooks/../results/bert_val_tokens.csv'}


  trainer = Trainer(


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


[bert] saved OOD predictions: {'n_samples': 33, 'n_rows': 10677, 'word_only': True, 'path': '/home/user/mnlp/notebooks/../results/bert_ood_tokens.csv'}
