In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
import gc
from datasets import load_dataset, DatasetDict, Dataset
import numpy as np
from sklearn.metrics import balanced_accuracy_score, precision_recall_fscore_support, accuracy_score, classification_report, matthews_corrcoef
import pandas as pd
import wandb
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["WANDB_PROJECT"] = "DEBATE-DeBERTa-Base"

modname = "MoritzLaurer/ModernBERT-base-zeroshot-v2.0"
training_directory ='training_base'
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device: {device}")

In [None]:
ds = load_dataset("mlburnham/Pol_NLI")

df = ds['train'].to_pandas()
dftest = ds['test'].to_pandas()
dfval = ds['validation'].to_pandas()

def truncate(text):
    words = text.split()
    if len(words) > 450:
        return " ".join(words[:450])
    return text

df['premise'] = df['premise'].apply(truncate)
dftest['premise'] = dftest['premise'].apply(truncate)
dfval['premise'] = dfval['premise'].apply(truncate)

ds = DatasetDict({'train': Dataset.from_pandas(df, preserve_index=False), 'validation':Dataset.from_pandas(dfval, preserve_index=False), 'test':Dataset.from_pandas(dftest, preserve_index=False)})

In [None]:
tokenizer = AutoTokenizer.from_pretrained(modname)
id2label = {0: "entailment", 1: "not_entailment"}

def tokenize_function(docs):
    return tokenizer(docs['premise'], docs['augmented_hypothesis'], padding = True, truncation = True)

def model_init():
  return AutoModelForSequenceClassification.from_pretrained(modname, 
                                                           num_labels=2,
                                                           ignore_mismatched_sizes=True,
                                                           id2label = id2label)

dstok = ds.map(tokenize_function, batched = True)
dstok = dstok.rename_columns({'entailment':'label'})

training_args = TrainingArguments(output_dir=training_directory,
    logging_dir=f'{training_directory}/logs',
    lr_scheduler_type= "linear",
    group_by_length=True,
    learning_rate=9e-6 if "large" in modname else 2e-5,
    per_device_train_batch_size=4 if "large" in modname else 16,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=4 if "large" in modname else 1,  
    num_train_epochs=10,
    warmup_ratio=0.06,
    weight_decay=0.01,
    fp16=True,
    eval_strategy="epoch",
    seed=1,
    save_strategy="epoch",
    dataloader_num_workers = 12,
)

def compute_metrics_standard(eval_pred, label_text_alphabetical=list(id2label.values())):
    labels = eval_pred.label_ids
    pred_logits = eval_pred.predictions
    preds_max = np.argmax(pred_logits, axis=1)

    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(labels, preds_max, average='macro')
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(labels, preds_max, average='micro')
    acc_balanced = balanced_accuracy_score(labels, preds_max)
    acc_not_balanced = accuracy_score(labels, preds_max)
    mcc = matthews_corrcoef(labels, preds_max)

    metrics = {'MCC': mcc,
            'f1_macro': f1_macro,
            'f1_micro': f1_micro,
            'accuracy_balanced': acc_balanced,
            'accuracy': acc_not_balanced,
            'precision_macro': precision_macro,
            'recall_macro': recall_macro,
            'precision_micro': precision_micro,
            'recall_micro': recall_micro,
            }
    print("Aggregate metrics: ", {key: metrics[key] for key in metrics if key not in ["label_gold_raw", "label_predicted_raw"]} )
    print("Detailed metrics: ", classification_report(
        labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
        target_names=label_text_alphabetical, sample_weight=None,
        digits=2, output_dict=True, zero_division='warn'),
    "\n")

    return metrics

trainer = Trainer(
    model_init=model_init,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=dstok['train'],
    eval_dataset=dstok['validation'],
    compute_metrics=lambda x: compute_metrics_standard(x, label_text_alphabetical=list(id2label.values()))
)

In [15]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmlburnham[0m. Use [1m`wandb login --relogin`[0m to force relogin


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Epoch,Training Loss,Validation Loss,Mcc,F1 Macro,F1 Micro,Accuracy Balanced,Accuracy,Precision Macro,Recall Macro,Precision Micro,Recall Micro
1,0.1762,0.248969,0.864245,0.931822,0.933892,0.929424,0.933892,0.934838,0.929424,0.933892,0.933892
2,0.1171,0.238175,0.89273,0.946356,0.947725,0.945937,0.947725,0.946793,0.945937,0.947725,0.947725
3,0.0836,0.30331,0.892146,0.945888,0.947459,0.943953,0.947459,0.948202,0.943953,0.947459,0.947459
4,0.0547,0.2369,0.898425,0.949065,0.950519,0.947325,0.950519,0.951108,0.947325,0.950519,0.950519
5,0.0445,0.280559,0.900932,0.950258,0.951716,0.948189,0.951716,0.952755,0.948189,0.951716,0.951716
6,0.0229,0.348102,0.895164,0.947102,0.94879,0.943991,0.94879,0.951202,0.943991,0.94879,0.94879
7,0.0119,0.37528,0.903259,0.951047,0.952647,0.947604,0.952647,0.955691,0.947604,0.952647,0.952647
8,0.0119,0.372962,0.906126,0.952729,0.954177,0.950102,0.954177,0.956044,0.950102,0.954177,0.954177
9,0.0067,0.410298,0.910981,0.955435,0.956637,0.954346,0.956637,0.956638,0.954346,0.956637,0.956637
10,0.0025,0.436678,0.912776,0.956281,0.957502,0.954773,0.957502,0.958009,0.954773,0.957502,0.957502


Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALL

Aggregate metrics:  {'MCC': np.float64(0.8642445778380897), 'f1_macro': 0.9318219340173488, 'f1_micro': 0.9338919925512105, 'accuracy_balanced': np.float64(0.92942363455671), 'accuracy': 0.9338919925512105, 'precision_macro': 0.9348379026072793, 'recall_macro': 0.92942363455671, 'precision_micro': 0.9338919925512105, 'recall_micro': 0.9338919925512105}
Detailed metrics:  {'entailment': {'precision': 0.939772914266908, 'recall': 0.9009307461744754, 'f1-score': 0.9199420103092784, 'support': 6339.0}, 'not_entailment': {'precision': 0.9299028909476504, 'recall': 0.9579165229389445, 'f1-score': 0.9437018577254191, 'support': 8697.0}, 'accuracy': 0.9338919925512105, 'macro avg': {'precision': 0.9348379026072793, 'recall': 0.92942363455671, 'f1-score': 0.9318219340173488, 'support': 15036.0}, 'weighted avg': {'precision': 0.9340639761977684, 'recall': 0.9338919925512105, 'f1-score': 0.9336849866978242, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, 

Aggregate metrics:  {'MCC': np.float64(0.8927299911532632), 'f1_macro': 0.9463564738997148, 'f1_micro': 0.9477254588986432, 'accuracy_balanced': np.float64(0.9459369363295305), 'accuracy': 0.9477254588986432, 'precision_macro': 0.9467934657220103, 'recall_macro': 0.9459369363295305, 'precision_micro': 0.9477254588986432, 'recall_micro': 0.9477254588986432}
Detailed metrics:  {'entailment': {'precision': 0.9410643367752184, 'recall': 0.9345322606089288, 'f1-score': 0.9377869241728669, 'support': 6339.0}, 'not_entailment': {'precision': 0.9525225946688022, 'recall': 0.9573416120501322, 'f1-score': 0.9549260236265626, 'support': 8697.0}, 'accuracy': 0.9477254588986432, 'macro avg': {'precision': 0.9467934657220103, 'recall': 0.9459369363295305, 'f1-score': 0.9463564738997148, 'support': 15036.0}, 'weighted avg': {'precision': 0.9476919284818224, 'recall': 0.9477254588986432, 'f1-score': 0.9477003817379634, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, 

Aggregate metrics:  {'MCC': np.float64(0.8921456335271707), 'f1_macro': 0.9458884025312029, 'f1_micro': 0.9474594306996542, 'accuracy_balanced': np.float64(0.943953344117606), 'accuracy': 0.9474594306996542, 'precision_macro': 0.9482024079599858, 'recall_macro': 0.943953344117606, 'precision_micro': 0.9474594306996542, 'recall_micro': 0.9474594306996542}
Detailed metrics:  {'entailment': {'precision': 0.9522412387938061, 'recall': 0.9215964663196088, 'f1-score': 0.9366682700016034, 'support': 6339.0}, 'not_entailment': {'precision': 0.9441635771261656, 'recall': 0.9663102219156031, 'f1-score': 0.9551085350608024, 'support': 8697.0}, 'accuracy': 0.9474594306996542, 'macro avg': {'precision': 0.9482024079599858, 'recall': 0.943953344117606, 'f1-score': 0.9458884025312029, 'support': 15036.0}, 'weighted avg': {'precision': 0.9475690238747139, 'recall': 0.9474594306996542, 'f1-score': 0.9473343371218385, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, 

Aggregate metrics:  {'MCC': np.float64(0.8984247742787135), 'f1_macro': 0.9490652484443634, 'f1_micro': 0.9505187549880287, 'accuracy_balanced': np.float64(0.9473250481953812), 'accuracy': 0.9505187549880287, 'precision_macro': 0.9511076890809419, 'recall_macro': 0.9473250481953812, 'precision_micro': 0.9505187549880287, 'recall_micro': 0.9505187549880287}
Detailed metrics:  {'entailment': {'precision': 0.9543608900438525, 'recall': 0.9269600883420098, 'f1-score': 0.940460947503201, 'support': 6339.0}, 'not_entailment': {'precision': 0.9478544881180313, 'recall': 0.9676900080487525, 'f1-score': 0.9576695493855257, 'support': 8697.0}, 'accuracy': 0.9505187549880287, 'macro avg': {'precision': 0.9511076890809419, 'recall': 0.9473250481953812, 'f1-score': 0.9490652484443634, 'support': 15036.0}, 'weighted avg': {'precision': 0.9505975103186021, 'recall': 0.9505187549880287, 'f1-score': 0.9504146060939551, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, 

Aggregate metrics:  {'MCC': np.float64(0.9009320258022427), 'f1_macro': 0.9502581613009853, 'f1_micro': 0.9517158818834797, 'accuracy_balanced': np.float64(0.9481888021507163), 'accuracy': 0.9517158818834797, 'precision_macro': 0.9527547939736248, 'recall_macro': 0.9481888021507163, 'precision_micro': 0.9517158818834797, 'recall_micro': 0.9517158818834797}
Detailed metrics:  {'entailment': {'precision': 0.9583537481626654, 'recall': 0.9256980596308566, 'f1-score': 0.94174289841117, 'support': 6339.0}, 'not_entailment': {'precision': 0.9471558397845843, 'recall': 0.970679544670576, 'f1-score': 0.9587734241908007, 'support': 8697.0}, 'accuracy': 0.9517158818834797, 'macro avg': {'precision': 0.9527547939736248, 'recall': 0.9481888021507163, 'f1-score': 0.9502581613009853, 'support': 15036.0}, 'weighted avg': {'precision': 0.951876745690986, 'recall': 0.9517158818834797, 'f1-score': 0.9515935556807529, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
TOKENIZERS_PARALLELISM=(true | false)iable 
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been us

Aggregate metrics:  {'MCC': np.float64(0.8951635932244992), 'f1_macro': 0.9471015005937404, 'f1_micro': 0.9487895716945997, 'accuracy_balanced': np.float64(0.9439911092058062), 'accuracy': 0.9487895716945997, 'precision_macro': 0.9512015229696618, 'recall_macro': 0.9439911092058062, 'precision_micro': 0.9487895716945997, 'recall_micro': 0.9487895716945997}
Detailed metrics:  {'entailment': {'precision': 0.9632340708700715, 'recall': 0.9133932796971131, 'f1-score': 0.9376518218623482, 'support': 6339.0}, 'not_entailment': {'precision': 0.9391689750692521, 'recall': 0.9745889387144993, 'f1-score': 0.9565511793251326, 'support': 8697.0}, 'accuracy': 0.9487895716945997, 'macro avg': {'precision': 0.9512015229696618, 'recall': 0.9439911092058062, 'f1-score': 0.9471015005937404, 'support': 15036.0}, 'weighted avg': {'precision': 0.9493145352103398, 'recall': 0.9487895716945997, 'f1-score': 0.9485834334514567, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, 

Aggregate metrics:  {'MCC': np.float64(0.903258845334105), 'f1_macro': 0.9510473851079764, 'f1_micro': 0.9526469805799415, 'accuracy_balanced': np.float64(0.9476036065332731), 'accuracy': 0.9526469805799415, 'precision_macro': 0.9556914476144035, 'recall_macro': 0.9476036065332731, 'precision_micro': 0.9526469805799415, 'recall_micro': 0.9526469805799415}
Detailed metrics:  {'entailment': {'precision': 0.9705636394045827, 'recall': 0.9154440763527371, 'f1-score': 0.9421984088326026, 'support': 6339.0}, 'not_entailment': {'precision': 0.9408192558242243, 'recall': 0.9797631367138093, 'f1-score': 0.9598963613833502, 'support': 8697.0}, 'accuracy': 0.9526469805799415, 'macro avg': {'precision': 0.9556914476144035, 'recall': 0.9476036065332731, 'f1-score': 0.9510473851079764, 'support': 15036.0}, 'weighted avg': {'precision': 0.9533591366113946, 'recall': 0.9526469805799415, 'f1-score': 0.9524351136300123, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, 

Aggregate metrics:  {'MCC': np.float64(0.9061263005813657), 'f1_macro': 0.9527293078611576, 'f1_micro': 0.9541766427241287, 'accuracy_balanced': np.float64(0.9501021153836631), 'accuracy': 0.9541766427241287, 'precision_macro': 0.956043664616752, 'recall_macro': 0.9501021153836631, 'precision_micro': 0.9541766427241287, 'recall_micro': 0.9541766427241287}
Detailed metrics:  {'entailment': {'precision': 0.965710517639301, 'recall': 0.9241205237419151, 'f1-score': 0.9444578798871422, 'support': 6339.0}, 'not_entailment': {'precision': 0.946376811594203, 'recall': 0.976083707025411, 'f1-score': 0.9610007358351729, 'support': 8697.0}, 'accuracy': 0.9541766427241287, 'macro avg': {'precision': 0.956043664616752, 'recall': 0.9501021153836631, 'f1-score': 0.9527293078611576, 'support': 15036.0}, 'weighted avg': {'precision': 0.954527673699808, 'recall': 0.9541766427241287, 'f1-score': 0.9540264631659412, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, 

Aggregate metrics:  {'MCC': np.float64(0.9109814368951077), 'f1_macro': 0.9554347924712345, 'f1_micro': 0.9566374035647779, 'accuracy_balanced': np.float64(0.9543464705232876), 'accuracy': 0.9566374035647779, 'precision_macro': 0.9566378481006266, 'recall_macro': 0.9543464705232876, 'precision_micro': 0.9566374035647779, 'recall_micro': 0.9566374035647779}
Detailed metrics:  {'entailment': {'precision': 0.9566404368074514, 'recall': 0.9397381290424357, 'f1-score': 0.9481139583001751, 'support': 6339.0}, 'not_entailment': {'precision': 0.9566352593938018, 'recall': 0.9689548120041394, 'f1-score': 0.9627556266422941, 'support': 8697.0}, 'accuracy': 0.9566374035647779, 'macro avg': {'precision': 0.9566378481006266, 'recall': 0.9543464705232876, 'f1-score': 0.9554347924712345, 'support': 15036.0}, 'weighted avg': {'precision': 0.9566374421302426, 'recall': 0.9566374035647779, 'f1-score': 0.9565828722115484, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, 

Aggregate metrics:  {'MCC': np.float64(0.9127763985128549), 'f1_macro': 0.9562807784295166, 'f1_micro': 0.9575019952114924, 'accuracy_balanced': np.float64(0.9547730690952556), 'accuracy': 0.9575019952114924, 'precision_macro': 0.9580090655651744, 'recall_macro': 0.9547730690952556, 'precision_micro': 0.9575019952114924, 'recall_micro': 0.9575019952114924}
Detailed metrics:  {'entailment': {'precision': 0.9608667529107374, 'recall': 0.9373718252090235, 'f1-score': 0.9489738880459954, 'support': 6339.0}, 'not_entailment': {'precision': 0.9551513782196114, 'recall': 0.9721743129814878, 'f1-score': 0.9635876688130378, 'support': 8697.0}, 'accuracy': 0.9575019952114924, 'macro avg': {'precision': 0.9580090655651744, 'recall': 0.9547730690952556, 'f1-score': 0.9562807784295166, 'support': 15036.0}, 'weighted avg': {'precision': 0.957560912681373, 'recall': 0.9575019952114924, 'f1-score': 0.9574266714545461, 'support': 15036.0}} 



  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


TrainOutput(global_step=107060, training_loss=0.05955734214114431, metrics={'train_runtime': 18316.0146, 'train_samples_per_second': 93.519, 'train_steps_per_second': 5.845, 'total_flos': 2.7258210701642544e+17, 'train_loss': 0.05955734214114431, 'epoch': 10.0})