In [1]:
import pandas as pd
from transformers import pipeline
from sklearn.metrics import matthews_corrcoef, accuracy_score, f1_score, balanced_accuracy_score, precision_recall_fscore_support, classification_report
from datasets import load_dataset, DatasetDict, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import torch
import numpy as np

import os
os.environ["WANDB_PROJECT"] = "offline"

training_directory ='fewshot'
#modname = "training_base/best_checkpoint_augmented"
modname = "training_large/latest_checkpoint"
modname = 'mlburnham/Political_DEBATE_large_v1.0'
# instantiate model
model = AutoModelForSequenceClassification.from_pretrained(modname, num_labels = 2, ignore_mismatched_sizes=True)

def metrics(df, preds, group_by=None):
    true_col = 'entailment'
    
    def get_metrics(y_true, y_pred):
        return {
            'MCC': matthews_corrcoef(y_true, y_pred),
            'Accuracy': accuracy_score(y_true, y_pred),
            'F1': f1_score(y_true, y_pred, average='weighted')
        }
    
    results = []
    
    if group_by not in ['dataset', 'task']:
        for col in preds:
            metrics = get_metrics(df[true_col], df[col])
            metrics['Column'] = col
            results.append(metrics)
    else:
        for col in preds:
            for group_name, group in df.groupby(group_by):
                metrics = get_metrics(group[true_col], group[col])
                metrics['Column'] = col
                metrics[group_by.capitalize()] = group_name
                results.append(metrics)
    
    results_df = pd.DataFrame(results)
    
    if group_by in ['dataset', 'task']:
        return results_df.set_index(['Column', group_by.capitalize()])
    else:
        return results_df.set_index('Column')

def truncate(text):
    words = text.split()
    if len(words) > 450:
        return " ".join(words[:450])
    return text


def tokenize_function(docs):
    return tokenizer(docs['premise'], docs['hypothesis'], padding = 'max_length', truncation = True)


def compute_metrics_standard(eval_pred, label_text_alphabetical=list(model.config.id2label.values())):
    labels = eval_pred.label_ids
    pred_logits = eval_pred.predictions
    preds_max = np.argmax(pred_logits, axis=1)

    # metrics
    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(labels, preds_max, average='macro') 
    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(labels, preds_max, average='micro')
    acc_balanced = balanced_accuracy_score(labels, preds_max)
    acc_not_balanced = accuracy_score(labels, preds_max)
    mcc = matthews_corrcoef(labels, preds_max)

    metrics = {'MCC': mcc,
            'f1_macro': f1_macro,
            'f1_micro': f1_micro,
            'accuracy_balanced': acc_balanced,
            'accuracy': acc_not_balanced,
            'precision_macro': precision_macro,
            'recall_macro': recall_macro,
            'precision_micro': precision_micro,
            'recall_micro': recall_micro,
            }
    print("Aggregate metrics: ", {key: metrics[key] for key in metrics if key not in ["label_gold_raw", "label_predicted_raw"]} )
    print("Detailed metrics: ", classification_report(
        labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),
        target_names=label_text_alphabetical, sample_weight=None,
        digits=2, output_dict=True, zero_division='warn'),
    "\n")

    return metrics

  from .autonotebook import tqdm as notebook_tqdm


# Freedom and Rights

In [2]:
fr = pd.read_csv('freedom_test.csv')
fr = fr[~fr['text'].isna()]
fr['text'] = fr['text'].astype(str)
fr['hypothesis'] = 'This text is about freedom and rights.'
fr.rename({'text':'premise', 'freedom_and_rights':'entailment'}, inplace = True, axis = 1)
fr.drop_duplicates('premise', inplace = True)
fr['entailment'].replace({1:0, 0:1}, inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  fr['entailment'].replace({1:0, 0:1}, inplace = True)


### Few Shot

In [3]:
training_args = TrainingArguments(output_dir=training_directory,
    logging_dir=f'{training_directory}/logs',
    lr_scheduler_type= "linear",
    group_by_length=False,
    learning_rate = 9e-6,
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 16,
    gradient_accumulation_steps = 1, 
    num_train_epochs=5,
    warmup_ratio=0.06,  
    weight_decay=0.01, 
    fp16=True,   
    fp16_full_eval=True,
    eval_strategy="no",
    seed=1,
    save_strategy="no",
    dataloader_num_workers = 12,
)

tokenizer = AutoTokenizer.from_pretrained(modname)

In [4]:
%%time
# Define a function to initialize the modelin the trainer. This will make results reproducible
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(modname, num_labels = 2, ignore_mismatched_sizes=True)

# Define the number of samples (shots) and random seeds to use
shots = [10, 25, 50, 100]
seeds = range(1,11)

# Initialize lists to store results
mcc_list = []
acc_list = []
shots_list = []

# Iterate through different shot sizes
for shot in shots:
    # Iterate through different random seeds
    for seed in seeds:
        # Sample training data based on current shot size and seed
        train = fr.sample(shot, random_state = seed)
        # Create validation set with remaining instances
        val = fr[~fr.index.isin(train.index)]
        
        # Create a DatasetDict with train and validation splits
        ds = DatasetDict({'train': Dataset.from_pandas(train, preserve_index=False), 'validation':Dataset.from_pandas(val, preserve_index=False)})
        # Tokenize the dataset
        dstok = ds.map(tokenize_function, batched = True)
        # Rename 'entailment' column to 'label'
        dstok = dstok.rename_columns({'entailment':'label'})
        # Define label mapping
        id2label = {0: "entailment", 1: "not_entailment"}
        
        # Initialize the Trainer
        trainer = Trainer(
            model_init = model_init,
            tokenizer=tokenizer,
            args=training_args,
            train_dataset=dstok['train'],
            eval_dataset=dstok['validation'],
            compute_metrics=lambda x: compute_metrics_standard(x, label_text_alphabetical=list(model.config.id2label.values()))
        )
        
        # Train the model
        trainer.train()
        # Make predictions on validation set
        res = trainer.predict(dstok['validation'])
        preds = np.argmax(res.predictions, axis=-1)
        
        # Calculate Matthews Correlation Coefficient
        mcc_res = matthews_corrcoef(val['entailment'], preds)
        mcc_list.append(mcc_res)
        # Calculate Accuracy
        acc_res = accuracy_score(val['entailment'], preds)
        acc_list.append(acc_res)
        # Store the current shot size
        shots_list.append(shot)
    
    # Print progress
    print(shot)

Map: 100%|██████████| 10/10 [00:00<00:00, 1242.90 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5828.75 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: mlburnham. Use `wandb login --relogin` to force relogin


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7844197233481313, 'f1_macro': 0.8902844007009825, 'f1_micro': 0.9056430446194226, 'accuracy_balanced': 0.8786685590360639, 'accuracy': 0.9056430446194226, 'precision_macro': 0.9062354054056527, 'recall_macro': 0.8786685590360639, 'precision_micro': 0.9056430446194226, 'recall_micro': 0.9056430446194226}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9076647243388615, 'recall': 0.7978723404255319, 'f1-score': 0.8492346403858251, 'support': 2538.0}, 'not_entailment': {'precision': 0.9048060864724439, 'recall': 0.9594647776465959, 'f1-score': 0.9313341610161399, 'support': 5082.0}, 'accuracy': 0.9056430446194226, 'macro avg': {'precision': 0.9062354054056527, 'recall': 0.8786685590360639, 'f1-score': 0.8902844007009825, 'support': 7620.0}, 'weighted avg': {'precision': 0.9057582154625972, 'recall': 0.9056430446194226, 'f1-score': 0.9039892025699799, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1318.13 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5263.51 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7760315945739757, 'f1_macro': 0.886929604226202, 'f1_micro': 0.9019685039370079, 'accuracy_balanced': 0.8781489008900808, 'accuracy': 0.9019685039370079, 'precision_macro': 0.8981401468836219, 'recall_macro': 0.8781489008900808, 'precision_micro': 0.9019685039370079, 'recall_micro': 0.9019685039370079}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8884548611111112, 'recall': 0.8068584942845881, 'f1-score': 0.8456930386283825, 'support': 2537.0}, 'not_entailment': {'precision': 0.9078254326561325, 'recall': 0.9494393074955735, 'f1-score': 0.9281661698240216, 'support': 5083.0}, 'accuracy': 0.9019685039370079, 'macro avg': {'precision': 0.8981401468836219, 'recall': 0.8781489008900808, 'f1-score': 0.886929604226202, 'support': 7620.0}, 'weighted avg': {'precision': 0.9013762016837286, 'recall': 0.9019685039370079, 'f1-score': 0.9007075958288331, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1414.13 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5219.67 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.768574103227656, 'f1_macro': 0.8830808536132791, 'f1_micro': 0.8986876640419947, 'accuracy_balanced': 0.8739516582506434, 'accuracy': 0.8986876640419947, 'precision_macro': 0.8949080978244193, 'recall_macro': 0.8739516582506434, 'precision_micro': 0.8986876640419947, 'recall_micro': 0.8986876640419947}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8854030501089325, 'recall': 0.799685163321527, 'f1-score': 0.8403639371381307, 'support': 2541.0}, 'not_entailment': {'precision': 0.9044131455399061, 'recall': 0.9482181531797598, 'f1-score': 0.9257977700884276, 'support': 5079.0}, 'accuracy': 0.8986876640419947, 'macro avg': {'precision': 0.8949080978244193, 'recall': 0.8739516582506434, 'f1-score': 0.8830808536132791, 'support': 7620.0}, 'weighted avg': {'precision': 0.898073952299735, 'recall': 0.8986876640419947, 'f1-score': 0.8973086139825608, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1427.22 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5348.12 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7772571732777682, 'f1_macro': 0.8876827485816929, 'f1_micro': 0.8980314960629922, 'accuracy_balanced': 0.8964566929133858, 'accuracy': 0.8980314960629922, 'precision_macro': 0.8809550476826804, 'recall_macro': 0.8964566929133858, 'precision_micro': 0.8980314960629922, 'recall_micro': 0.8980314960629922}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8185760751716661, 'recall': 0.8917322834645669, 'f1-score': 0.8535895986433013, 'support': 2540.0}, 'not_entailment': {'precision': 0.9433340201936946, 'recall': 0.9011811023622047, 'f1-score': 0.9217758985200846, 'support': 5080.0}, 'accuracy': 0.8980314960629922, 'macro avg': {'precision': 0.8809550476826804, 'recall': 0.8964566929133858, 'f1-score': 0.8876827485816929, 'support': 7620.0}, 'weighted avg': {'precision': 0.9017480385196852, 'recall': 0.8980314960629922, 'f1-score': 0.8990471318944901, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1350.61 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5943.53 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7945421706220406, 'f1_macro': 0.8970004236178288, 'f1_micro': 0.9095800524934383, 'accuracy_balanced': 0.8924424905263986, 'accuracy': 0.9095800524934383, 'precision_macro': 0.9021590909090909, 'recall_macro': 0.8924424905263986, 'precision_micro': 0.9095800524934383, 'recall_micro': 0.9095800524934383}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8818181818181818, 'recall': 0.8411509657075286, 'f1-score': 0.8610046399031672, 'support': 2537.0}, 'not_entailment': {'precision': 0.9225, 'recall': 0.9437340153452686, 'f1-score': 0.9329962073324906, 'support': 5083.0}, 'accuracy': 0.9095800524934383, 'macro avg': {'precision': 0.9021590909090909, 'recall': 0.8924424905263986, 'f1-score': 0.8970004236178288, 'support': 7620.0}, 'weighted avg': {'precision': 0.908955410403245, 'recall': 0.9095800524934383, 'f1-score': 0.909027361326166, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1174.18 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5310.27 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7937870441447499, 'f1_macro': 0.896797649171802, 'f1_micro': 0.9089238845144357, 'accuracy_balanced': 0.8940709152920017, 'accuracy': 0.9089238845144357, 'precision_macro': 0.8997363463027741, 'recall_macro': 0.8940709152920017, 'precision_micro': 0.9089238845144357, 'recall_micro': 0.9089238845144357}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8736330498177399, 'recall': 0.8495470657739267, 'f1-score': 0.8614217252396166, 'support': 2539.0}, 'not_entailment': {'precision': 0.9258396427878082, 'recall': 0.9385947648100768, 'f1-score': 0.9321735731039875, 'support': 5081.0}, 'accuracy': 0.9089238845144357, 'macro avg': {'precision': 0.8997363463027741, 'recall': 0.8940709152920017, 'f1-score': 0.896797649171802, 'support': 7620.0}, 'weighted avg': {'precision': 0.9084442963900388, 'recall': 0.9089238845144357, 'f1-score': 0.9085989088352685, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1535.48 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5800.20 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6485111718252304, 'f1_macro': 0.8130713993088186, 'f1_micro': 0.8223097112860892, 'accuracy_balanced': 0.8418324211189521, 'accuracy': 0.8223097112860892, 'precision_macro': 0.807582541911511, 'recall_macro': 0.8418324211189521, 'precision_micro': 0.8223097112860892, 'recall_micro': 0.8223097112860892}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.674933569530558, 'recall': 0.9003544702638834, 'f1-score': 0.7715153560580493, 'support': 2539.0}, 'not_entailment': {'precision': 0.940231514292464, 'recall': 0.7833103719740209, 'f1-score': 0.8546274425595877, 'support': 5081.0}, 'accuracy': 0.8223097112860892, 'macro avg': {'precision': 0.807582541911511, 'recall': 0.8418324211189521, 'f1-score': 0.8130713993088186, 'support': 7620.0}, 'weighted avg': {'precision': 0.8518336820417449, 'recall': 0.8223097112860892, 'f1-score': 0.8269343208237075, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1657.17 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5865.85 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7757503144808261, 'f1_macro': 0.887868951753807, 'f1_micro': 0.9005249343832021, 'accuracy_balanced': 0.8871818098594808, 'accuracy': 0.9005249343832021, 'precision_macro': 0.8885697462359522, 'recall_macro': 0.8871818098594808, 'precision_micro': 0.9005249343832021, 'recall_micro': 0.9005249343832021}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8532328441094804, 'recall': 0.8471839306813707, 'f1-score': 0.850197628458498, 'support': 2539.0}, 'not_entailment': {'precision': 0.923906648362424, 'recall': 0.9271796890375911, 'f1-score': 0.925540275049116, 'support': 5081.0}, 'accuracy': 0.9005249343832021, 'macro avg': {'precision': 0.8885697462359522, 'recall': 0.8871818098594808, 'f1-score': 0.887868951753807, 'support': 7620.0}, 'weighted avg': {'precision': 0.9003579883889038, 'recall': 0.9005249343832021, 'f1-score': 0.9004359470053392, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1460.26 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5354.80 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.791598619919942, 'f1_macro': 0.8934394213381556, 'f1_micro': 0.9086614173228347, 'accuracy_balanced': 0.880573620308854, 'accuracy': 0.9086614173228347, 'precision_macro': 0.91163413701048, 'recall_macro': 0.880573620308854, 'precision_micro': 0.9086614173228347, 'recall_micro': 0.9086614173228347}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9186733303044071, 'recall': 0.796376526191414, 'f1-score': 0.8531645569620253, 'support': 2539.0}, 'not_entailment': {'precision': 0.9045949437165529, 'recall': 0.964770714426294, 'f1-score': 0.9337142857142857, 'support': 5081.0}, 'accuracy': 0.9086614173228347, 'macro avg': {'precision': 0.91163413701048, 'recall': 0.880573620308854, 'f1-score': 0.8934394213381556, 'support': 7620.0}, 'weighted avg': {'precision': 0.9092858916885428, 'recall': 0.9086614173228347, 'f1-score': 0.9068749469607438, 'support': 7620.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1618.11 examples/s]
Map: 100%|██████████| 7620/7620 [00:01<00:00, 5538.74 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7576980912222729, 'f1_macro': 0.8757929276173769, 'f1_micro': 0.8940944881889764, 'accuracy_balanced': 0.8618532034469317, 'accuracy': 0.8940944881889764, 'precision_macro': 0.8966431635626466, 'recall_macro': 0.8618532034469317, 'precision_micro': 0.8940944881889764, 'recall_micro': 0.8940944881889764}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9025069637883009, 'recall': 0.7650531286894924, 'f1-score': 0.8281150159744409, 'support': 2541.0}, 'not_entailment': {'precision': 0.8907793633369924, 'recall': 0.958653278204371, 'f1-score': 0.9234708392603129, 'support': 5079.0}, 'accuracy': 0.8940944881889764, 'macro avg': {'precision': 0.8966431635626466, 'recall': 0.8618532034469317, 'f1-score': 0.8757929276173769, 'support': 7620.0}, 'weighted avg': {'precision': 0.8946901025426058, 'recall': 0.8940944881889764, 'f1-score': 0.8916730509441186, 'support': 7620.0}} 

10


Map: 100%|██████████| 25/25 [00:00<00:00, 3001.68 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 6179.70 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7653995302724572, 'f1_macro': 0.8783869437885281, 'f1_micro': 0.897172912557528, 'accuracy_balanced': 0.8619329388560157, 'accuracy': 0.897172912557528, 'precision_macro': 0.9046581410861555, 'recall_macro': 0.8619329388560157, 'precision_micro': 0.897172912557528, 'recall_micro': 0.897172912557528}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.921191734742912, 'recall': 0.7562130177514793, 'f1-score': 0.8305892547660312, 'support': 2535.0}, 'not_entailment': {'precision': 0.888124547429399, 'recall': 0.9676528599605523, 'f1-score': 0.9261846328110251, 'support': 5070.0}, 'accuracy': 0.897172912557528, 'macro avg': {'precision': 0.9046581410861555, 'recall': 0.8619329388560157, 'f1-score': 0.8783869437885281, 'support': 7605.0}, 'weighted avg': {'precision': 0.8991469432005699, 'recall': 0.897172912557528, 'f1-score': 0.8943195067960271, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2487.19 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 5348.61 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7615350820546173, 'f1_macro': 0.8744010612846346, 'f1_micro': 0.895069033530572, 'accuracy_balanced': 0.8549298315858429, 'accuracy': 0.895069033530572, 'precision_macro': 0.9084861496487582, 'recall_macro': 0.8549298315858429, 'precision_micro': 0.895069033530572, 'recall_micro': 0.895069033530572}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9365878208354304, 'recall': 0.7347019344650612, 'f1-score': 0.8234513274336284, 'support': 2533.0}, 'not_entailment': {'precision': 0.8803844784620861, 'recall': 0.9751577287066246, 'f1-score': 0.9253507951356408, 'support': 5072.0}, 'accuracy': 0.895069033530572, 'macro avg': {'precision': 0.9084861496487582, 'recall': 0.8549298315858429, 'f1-score': 0.8744010612846346, 'support': 7605.0}, 'weighted avg': {'precision': 0.8991041452907095, 'recall': 0.895069033530572, 'f1-score': 0.8914111039207562, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2906.74 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 5891.63 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7379815952819703, 'f1_macro': 0.8652671400663586, 'f1_micro': 0.8857330703484549, 'accuracy_balanced': 0.8503483369616326, 'accuracy': 0.8857330703484549, 'precision_macro': 0.8886252463034838, 'recall_macro': 0.8503483369616326, 'precision_micro': 0.8857330703484549, 'recall_micro': 0.8857330703484549}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8951115329852871, 'recall': 0.744277821625888, 'f1-score': 0.812755871579401, 'support': 2534.0}, 'not_entailment': {'precision': 0.8821389596216807, 'recall': 0.9564188522973772, 'f1-score': 0.9177784085533163, 'support': 5071.0}, 'accuracy': 0.8857330703484549, 'macro avg': {'precision': 0.8886252463034838, 'recall': 0.8503483369616326, 'f1-score': 0.8652671400663586, 'support': 7605.0}, 'weighted avg': {'precision': 0.8864614449475687, 'recall': 0.8857330703484549, 'f1-score': 0.8827847058982339, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2879.28 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 6115.98 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7747886492901683, 'f1_macro': 0.8854731057746967, 'f1_micro': 0.9015121630506245, 'accuracy_balanced': 0.8739894823658932, 'accuracy': 0.9015121630506245, 'precision_macro': 0.9012796344374073, 'recall_macro': 0.8739894823658932, 'precision_micro': 0.9015121630506245, 'recall_micro': 0.9015121630506245}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9007187780772686, 'recall': 0.7915515199368338, 'f1-score': 0.8426139945366674, 'support': 2533.0}, 'not_entailment': {'precision': 0.901840490797546, 'recall': 0.9564274447949527, 'f1-score': 0.9283322170127261, 'support': 5072.0}, 'accuracy': 0.9015121630506245, 'macro avg': {'precision': 0.9012796344374073, 'recall': 0.8739894823658932, 'f1-score': 0.8854731057746967, 'support': 7605.0}, 'weighted avg': {'precision': 0.9014668815509368, 'recall': 0.9015121630506245, 'f1-score': 0.899782018783685, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2454.07 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 5445.33 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7618168703131731, 'f1_macro': 0.8775738285612447, 'f1_micro': 0.8958579881656805, 'accuracy_balanced': 0.8629750751639271, 'accuracy': 0.8958579881656805, 'precision_macro': 0.8997278212778473, 'recall_macro': 0.8629750751639271, 'precision_micro': 0.8958579881656805, 'recall_micro': 0.8958579881656805}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9085365853658537, 'recall': 0.7644041041831097, 'f1-score': 0.8302614659237034, 'support': 2534.0}, 'not_entailment': {'precision': 0.890919057189841, 'recall': 0.9615460461447446, 'f1-score': 0.924886191198786, 'support': 5071.0}, 'accuracy': 0.8958579881656805, 'macro avg': {'precision': 0.8997278212778473, 'recall': 0.8629750751639271, 'f1-score': 0.8775738285612447, 'support': 7605.0}, 'weighted avg': {'precision': 0.8967892500100929, 'recall': 0.8958579881656805, 'f1-score': 0.8933570585430254, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2797.92 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 5768.42 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7763505026405679, 'f1_macro': 0.8881100733802709, 'f1_micro': 0.9011176857330704, 'accuracy_balanced': 0.8858974358974359, 'accuracy': 0.9011176857330704, 'precision_macro': 0.890466511878077, 'recall_macro': 0.8858974358974359, 'precision_micro': 0.9011176857330704, 'recall_micro': 0.9011176857330704}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8599111828825192, 'recall': 0.8402366863905325, 'f1-score': 0.8499600957701516, 'support': 2535.0}, 'not_entailment': {'precision': 0.9210218408736349, 'recall': 0.9315581854043392, 'f1-score': 0.9262600509903903, 'support': 5070.0}, 'accuracy': 0.9011176857330704, 'macro avg': {'precision': 0.890466511878077, 'recall': 0.8858974358974359, 'f1-score': 0.8881100733802709, 'support': 7605.0}, 'weighted avg': {'precision': 0.9006516215432631, 'recall': 0.9011176857330704, 'f1-score': 0.900826732583644, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2260.79 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 5164.03 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7306821961325816, 'f1_macro': 0.8605239836047414, 'f1_micro': 0.8824457593688363, 'accuracy_balanced': 0.8439847556868683, 'accuracy': 0.8824457593688363, 'precision_macro': 0.8880233519934979, 'recall_macro': 0.8439847556868683, 'precision_micro': 0.8824457593688363, 'recall_micro': 0.8824457593688363}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9001461276181199, 'recall': 0.7284193929838392, 'f1-score': 0.8052287581699347, 'support': 2537.0}, 'not_entailment': {'precision': 0.8759005763688761, 'recall': 0.9595501183898973, 'f1-score': 0.915819209039548, 'support': 5068.0}, 'accuracy': 0.8824457593688363, 'macro avg': {'precision': 0.8880233519934979, 'recall': 0.8439847556868683, 'f1-score': 0.8605239836047414, 'support': 7605.0}, 'weighted avg': {'precision': 0.883988802998637, 'recall': 0.8824457593688363, 'f1-score': 0.8789266418000727, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2339.47 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 5923.08 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7504910019793941, 'f1_macro': 0.8719918580540038, 'f1_micro': 0.8909927679158448, 'accuracy_balanced': 0.8577794108908751, 'accuracy': 0.8909927679158448, 'precision_macro': 0.8935642513983467, 'recall_macro': 0.8577794108908751, 'precision_micro': 0.8909927679158448, 'recall_micro': 0.8909927679158448}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8994387277829747, 'recall': 0.757981868348443, 'f1-score': 0.8226737967914438, 'support': 2537.0}, 'not_entailment': {'precision': 0.8876897750137187, 'recall': 0.9575769534333071, 'f1-score': 0.9213099193165638, 'support': 5068.0}, 'accuracy': 0.8909927679158448, 'macro avg': {'precision': 0.8935642513983467, 'recall': 0.8577794108908751, 'f1-score': 0.8719918580540038, 'support': 7605.0}, 'weighted avg': {'precision': 0.8916091824003858, 'recall': 0.8909927679158448, 'f1-score': 0.8884052719995054, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2362.08 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 5606.34 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.771285969686669, 'f1_macro': 0.8825756652732895, 'f1_micro': 0.8999342537804076, 'accuracy_balanced': 0.8683598892100612, 'accuracy': 0.8999342537804076, 'precision_macro': 0.903736986884766, 'recall_macro': 0.8683598892100612, 'precision_micro': 0.8999342537804076, 'recall_micro': 0.8999342537804076}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9124767225325885, 'recall': 0.7737860244769048, 'f1-score': 0.8374279000213629, 'support': 2533.0}, 'not_entailment': {'precision': 0.8949972512369434, 'recall': 0.9629337539432177, 'f1-score': 0.9277234305252161, 'support': 5072.0}, 'accuracy': 0.8999342537804076, 'macro avg': {'precision': 0.903736986884766, 'recall': 0.8683598892100612, 'f1-score': 0.8825756652732895, 'support': 7605.0}, 'weighted avg': {'precision': 0.9008191448321925, 'recall': 0.8999342537804076, 'f1-score': 0.897648666716372, 'support': 7605.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2252.29 examples/s]
Map: 100%|██████████| 7605/7605 [00:01<00:00, 5540.65 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7392674209555993, 'f1_macro': 0.8601622274162789, 'f1_micro': 0.8848126232741618, 'accuracy_balanced': 0.8378866104463724, 'accuracy': 0.8848126232741618, 'precision_macro': 0.9043636998254799, 'recall_macro': 0.8378866104463724, 'precision_micro': 0.8848126232741618, 'recall_micro': 0.8848126232741618}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9429333333333333, 'recall': 0.6968860859282617, 'f1-score': 0.8014505893019039, 'support': 2537.0}, 'not_entailment': {'precision': 0.8657940663176266, 'recall': 0.978887134964483, 'f1-score': 0.9188738655306539, 'support': 5068.0}, 'accuracy': 0.8848126232741618, 'macro avg': {'precision': 0.9043636998254799, 'recall': 0.8378866104463724, 'f1-score': 0.8601622274162789, 'support': 7605.0}, 'weighted avg': {'precision': 0.8915274417836159, 'recall': 0.8848126232741618, 'f1-score': 0.8797018929083871, 'support': 7605.0}} 

25


Map: 100%|██████████| 50/50 [00:00<00:00, 3656.95 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5900.36 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7520462649965907, 'f1_macro': 0.8749264972580462, 'f1_micro': 0.891424802110818, 'accuracy_balanced': 0.866381175268618, 'accuracy': 0.891424802110818, 'precision_macro': 0.885918834585772, 'recall_macro': 0.866381175268618, 'precision_micro': 0.891424802110818, 'recall_micro': 0.891424802110818}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8719512195121951, 'recall': 0.7909917028842355, 'f1-score': 0.8295007250880464, 'support': 2531.0}, 'not_entailment': {'precision': 0.8998864496593489, 'recall': 0.9417706476530006, 'f1-score': 0.920352269428046, 'support': 5049.0}, 'accuracy': 0.891424802110818, 'macro avg': {'precision': 0.885918834585772, 'recall': 0.866381175268618, 'f1-score': 0.8749264972580462, 'support': 7580.0}, 'weighted avg': {'precision': 0.8905587362685249, 'recall': 0.891424802110818, 'f1-score': 0.8900164833166293, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3564.58 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5348.40 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7426166264895775, 'f1_macro': 0.8712888472974809, 'f1_micro': 0.8860158311345646, 'accuracy_balanced': 0.8701127404762383, 'accuracy': 0.8860158311345646, 'precision_macro': 0.8725077480642458, 'recall_macro': 0.8701127404762383, 'precision_micro': 0.8860158311345646, 'recall_micro': 0.8860158311345646}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8330658105939005, 'recall': 0.8225039619651348, 'f1-score': 0.8277511961722488, 'support': 2524.0}, 'not_entailment': {'precision': 0.9119496855345912, 'recall': 0.9177215189873418, 'f1-score': 0.9148264984227129, 'support': 5056.0}, 'accuracy': 0.8860158311345646, 'macro avg': {'precision': 0.8725077480642458, 'recall': 0.8701127404762383, 'f1-score': 0.8712888472974809, 'support': 7580.0}, 'weighted avg': {'precision': 0.8856828121374536, 'recall': 0.8860158311345646, 'f1-score': 0.8858320310242734, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3194.93 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5355.24 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7875000668288757, 'f1_macro': 0.893496233430687, 'f1_micro': 0.9064643799472295, 'accuracy_balanced': 0.8891116797729142, 'accuracy': 0.9064643799472295, 'precision_macro': 0.8984436779290508, 'recall_macro': 0.8891116797729142, 'precision_micro': 0.9064643799472295, 'recall_micro': 0.9064643799472295}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.876399834093737, 'recall': 0.8371632329635499, 'f1-score': 0.8563323201621074, 'support': 2524.0}, 'not_entailment': {'precision': 0.9204875217643644, 'recall': 0.9410601265822784, 'f1-score': 0.9306601466992666, 'support': 5056.0}, 'accuracy': 0.9064643799472295, 'macro avg': {'precision': 0.8984436779290508, 'recall': 0.8891116797729142, 'f1-score': 0.893496233430687, 'support': 7580.0}, 'weighted avg': {'precision': 0.9058071360545144, 'recall': 0.9064643799472295, 'f1-score': 0.9059103532718537, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3331.30 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5616.46 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.8275077376250566, 'f1_macro': 0.9137525894429435, 'f1_micro': 0.9234828496042217, 'accuracy_balanced': 0.9134233498113769, 'accuracy': 0.9234828496042217, 'precision_macro': 0.9140846520532544, 'recall_macro': 0.9134233498113769, 'precision_micro': 0.9234828496042217, 'recall_micro': 0.9234828496042217}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.886191802626343, 'recall': 0.8833796112653709, 'f1-score': 0.8847834723877632, 'support': 2521.0}, 'not_entailment': {'precision': 0.9419775014801658, 'recall': 0.9434670883573829, 'f1-score': 0.9427217064981236, 'support': 5059.0}, 'accuracy': 0.9234828496042217, 'macro avg': {'precision': 0.9140846520532544, 'recall': 0.9134233498113769, 'f1-score': 0.9137525894429435, 'support': 7580.0}, 'weighted avg': {'precision': 0.923423972877199, 'recall': 0.9234828496042217, 'f1-score': 0.9234522753382004, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3444.79 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5772.67 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.8301150068751187, 'f1_macro': 0.9149340137179137, 'f1_micro': 0.925065963060686, 'accuracy_balanced': 0.9117238481985557, 'accuracy': 0.925065963060686, 'precision_macro': 0.9184181506939095, 'recall_macro': 0.9117238481985557, 'precision_micro': 0.925065963060686, 'recall_micro': 0.925065963060686}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8997134670487106, 'recall': 0.8718762395874653, 'f1-score': 0.8855761482675262, 'support': 2521.0}, 'not_entailment': {'precision': 0.9371228343391085, 'recall': 0.9515714568096462, 'f1-score': 0.9442918791683013, 'support': 5059.0}, 'accuracy': 0.925065963060686, 'macro avg': {'precision': 0.9184181506939095, 'recall': 0.9117238481985557, 'f1-score': 0.9149340137179137, 'support': 7580.0}, 'weighted avg': {'precision': 0.924681011787777, 'recall': 0.925065963060686, 'f1-score': 0.9247638636536768, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3102.39 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5303.11 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7840915714236891, 'f1_macro': 0.8883350944565872, 'f1_micro': 0.9051451187335092, 'accuracy_balanced': 0.8726563978224795, 'accuracy': 0.9051451187335092, 'precision_macro': 0.9124440073819282, 'recall_macro': 0.8726563978224795, 'precision_micro': 0.9051451187335092, 'recall_micro': 0.9051451187335092}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9289099526066351, 'recall': 0.77500988533017, 'f1-score': 0.8450097003664583, 'support': 2529.0}, 'not_entailment': {'precision': 0.8959780621572212, 'recall': 0.9703029103147891, 'f1-score': 0.9316604885467161, 'support': 5051.0}, 'accuracy': 0.9051451187335092, 'macro avg': {'precision': 0.9124440073819282, 'recall': 0.8726563978224795, 'f1-score': 0.8883350944565872, 'support': 7580.0}, 'weighted avg': {'precision': 0.9069654963190376, 'recall': 0.9051451187335092, 'f1-score': 0.9027502189810338, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3734.05 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5677.29 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7743300575046395, 'f1_macro': 0.8845566458274666, 'f1_micro': 0.9011873350923483, 'accuracy_balanced': 0.8713648660296061, 'accuracy': 0.9011873350923483, 'precision_macro': 0.9036374283097488, 'recall_macro': 0.8713648660296061, 'precision_micro': 0.9011873350923483, 'recall_micro': 0.9011873350923483}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9093836246550138, 'recall': 0.7817319098457889, 'f1-score': 0.8407399532213481, 'support': 2529.0}, 'not_entailment': {'precision': 0.897891231964484, 'recall': 0.9609978222134231, 'f1-score': 0.9283733384335852, 'support': 5051.0}, 'accuracy': 0.9011873350923483, 'macro avg': {'precision': 0.9036374283097488, 'recall': 0.8713648660296061, 'f1-score': 0.8845566458274666, 'support': 7580.0}, 'weighted avg': {'precision': 0.9017255672038441, 'recall': 0.9011873350923483, 'f1-score': 0.8991352340534073, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3218.32 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5448.55 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7562274057220876, 'f1_macro': 0.875243457606437, 'f1_micro': 0.8934036939313984, 'accuracy_balanced': 0.8617583243283384, 'accuracy': 0.8934036939313984, 'precision_macro': 0.8952085209282634, 'recall_macro': 0.8617583243283384, 'precision_micro': 0.8934036939313984, 'recall_micro': 0.8934036939313984}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8993973110802039, 'recall': 0.7664954563413671, 'f1-score': 0.8276450511945392, 'support': 2531.0}, 'not_entailment': {'precision': 0.891019730776323, 'recall': 0.9570211923153099, 'f1-score': 0.9228418640183346, 'support': 5049.0}, 'accuracy': 0.8934036939313984, 'macro avg': {'precision': 0.8952085209282634, 'recall': 0.8617583243283384, 'f1-score': 0.875243457606437, 'support': 7580.0}, 'weighted avg': {'precision': 0.8938170468382125, 'recall': 0.8934036939313984, 'f1-score': 0.8910551709765105, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3818.42 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5466.33 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7921424708209758, 'f1_macro': 0.8960599416927949, 'f1_micro': 0.9079155672823219, 'accuracy_balanced': 0.895116254932082, 'accuracy': 0.9079155672823219, 'precision_macro': 0.8970285240392235, 'recall_macro': 0.895116254932082, 'precision_micro': 0.9079155672823219, 'recall_micro': 0.9079155672823219}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8650920736589272, 'recall': 0.8568596352101506, 'f1-score': 0.8609561752988047, 'support': 2522.0}, 'not_entailment': {'precision': 0.9289649744195199, 'recall': 0.9333728746540134, 'f1-score': 0.9311637080867851, 'support': 5058.0}, 'accuracy': 0.9079155672823219, 'macro avg': {'precision': 0.8970285240392235, 'recall': 0.895116254932082, 'f1-score': 0.8960599416927949, 'support': 7580.0}, 'weighted avg': {'precision': 0.9077133311849268, 'recall': 0.9079155672823219, 'f1-score': 0.9078044207924201, 'support': 7580.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 3345.91 examples/s]
Map: 100%|██████████| 7580/7580 [00:01<00:00, 5231.31 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7823085555690015, 'f1_macro': 0.8898306112025547, 'f1_micro': 0.9046174142480211, 'accuracy_balanced': 0.8801577409826649, 'accuracy': 0.9046174142480211, 'precision_macro': 0.9024689031285338, 'recall_macro': 0.8801577409826649, 'precision_micro': 0.9046174142480211, 'recall_micro': 0.9046174142480211}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8970976253298153, 'recall': 0.8066429418742586, 'f1-score': 0.84946908182386, 'support': 2529.0}, 'not_entailment': {'precision': 0.9078401809272522, 'recall': 0.953672540091071, 'f1-score': 0.9301921405812494, 'support': 5051.0}, 'accuracy': 0.9046174142480211, 'macro avg': {'precision': 0.9024689031285338, 'recall': 0.8801577409826649, 'f1-score': 0.8898306112025547, 'support': 7580.0}, 'weighted avg': {'precision': 0.9042560222061549, 'recall': 0.9046174142480211, 'f1-score': 0.9032596055420096, 'support': 7580.0}} 

50


Map: 100%|██████████| 100/100 [00:00<00:00, 3843.23 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5758.94 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.8011084876431211, 'f1_macro': 0.8990771562731799, 'f1_micro': 0.9126162018592298, 'accuracy_balanced': 0.8887399685756837, 'accuracy': 0.9126162018592298, 'precision_macro': 0.9127275691031124, 'recall_macro': 0.8887399685756837, 'precision_micro': 0.9126162018592298, 'recall_micro': 0.9126162018592298}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9130048823790502, 'recall': 0.8165938864628821, 'f1-score': 0.8621123218776194, 'support': 2519.0}, 'not_entailment': {'precision': 0.9124502558271745, 'recall': 0.9608860506884853, 'f1-score': 0.9360419906687403, 'support': 5011.0}, 'accuracy': 0.9126162018592298, 'macro avg': {'precision': 0.9127275691031124, 'recall': 0.8887399685756837, 'f1-score': 0.8990771562731799, 'support': 7530.0}, 'weighted avg': {'precision': 0.9126357942447275, 'recall': 0.9126162018592298, 'f1-score': 0.9113104055844332, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 4157.43 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5806.75 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.8147908888583739, 'f1_macro': 0.9073889589928641, 'f1_micro': 0.9176626826029216, 'accuracy_balanced': 0.9081257666897509, 'accuracy': 0.9176626826029216, 'precision_macro': 0.9066664290466686, 'recall_macro': 0.9081257666897509, 'precision_micro': 0.9176626826029216, 'recall_micro': 0.9176626826029216}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8734126984126984, 'recall': 0.8796962430055956, 'f1-score': 0.8765432098765432, 'support': 2502.0}, 'not_entailment': {'precision': 0.9399201596806387, 'recall': 0.9365552903739062, 'f1-score': 0.9382347081091851, 'support': 5028.0}, 'accuracy': 0.9176626826029216, 'macro avg': {'precision': 0.9066664290466686, 'recall': 0.9081257666897509, 'f1-score': 0.9073889589928641, 'support': 7530.0}, 'weighted avg': {'precision': 0.9178216645820483, 'recall': 0.9176626826029216, 'f1-score': 0.9177364174613671, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 4185.97 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5254.49 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.8188967193481759, 'f1_macro': 0.9091517816747199, 'f1_micro': 0.9203187250996016, 'accuracy_balanced': 0.9042631155599249, 'accuracy': 0.9203187250996016, 'precision_macro': 0.9147001118506701, 'recall_macro': 0.9042631155599249, 'precision_micro': 0.9203187250996016, 'recall_micro': 0.9203187250996016}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.89937106918239, 'recall': 0.8562874251497006, 'f1-score': 0.8773006134969326, 'support': 2505.0}, 'not_entailment': {'precision': 0.9300291545189504, 'recall': 0.9522388059701492, 'f1-score': 0.9410029498525073, 'support': 5025.0}, 'accuracy': 0.9203187250996016, 'macro avg': {'precision': 0.9147001118506701, 'recall': 0.9042631155599249, 'f1-score': 0.9091517816747199, 'support': 7530.0}, 'weighted avg': {'precision': 0.9198301500344771, 'recall': 0.9203187250996016, 'f1-score': 0.919811136762107, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 4432.69 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5546.78 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.8215335858200243, 'f1_macro': 0.9106027135953774, 'f1_micro': 0.9199203187250996, 'accuracy_balanced': 0.9143612957032377, 'accuracy': 0.9199203187250996, 'precision_macro': 0.9072034717219812, 'recall_macro': 0.9143612957032377, 'precision_micro': 0.9199203187250996, 'recall_micro': 0.9199203187250996}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8662813102119461, 'recall': 0.8977635782747604, 'f1-score': 0.881741517944695, 'support': 2504.0}, 'not_entailment': {'precision': 0.9481256332320163, 'recall': 0.930959013131715, 'f1-score': 0.9394639092460596, 'support': 5026.0}, 'accuracy': 0.9199203187250996, 'macro avg': {'precision': 0.9072034717219812, 'recall': 0.9143612957032377, 'f1-score': 0.9106027135953774, 'support': 7530.0}, 'weighted avg': {'precision': 0.9209094068253422, 'recall': 0.9199203187250996, 'f1-score': 0.9202691060828967, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 4419.90 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5958.25 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.815830201819338, 'f1_macro': 0.9076718089743628, 'f1_micro': 0.9188579017264277, 'accuracy_balanced': 0.9032632493150132, 'accuracy': 0.9188579017264277, 'precision_macro': 0.9126206140350877, 'recall_macro': 0.9032632493150132, 'precision_micro': 0.9188579017264277, 'recall_micro': 0.9188579017264277}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8954166666666666, 'recall': 0.8565165404543643, 'f1-score': 0.8755347321246689, 'support': 2509.0}, 'not_entailment': {'precision': 0.9298245614035088, 'recall': 0.9500099581756623, 'f1-score': 0.9398088858240568, 'support': 5021.0}, 'accuracy': 0.9188579017264277, 'macro avg': {'precision': 0.9126206140350877, 'recall': 0.9032632493150132, 'f1-score': 0.9076718089743628, 'support': 7530.0}, 'weighted avg': {'precision': 0.9183598325994268, 'recall': 0.9188579017264277, 'f1-score': 0.9183927036684439, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 4007.71 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5933.56 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7781398097617404, 'f1_macro': 0.8874733951858516, 'f1_micro': 0.902788844621514, 'accuracy_balanced': 0.8769702819969515, 'accuracy': 0.902788844621514, 'precision_macro': 0.9015578896090104, 'recall_macro': 0.8769702819969515, 'precision_micro': 0.902788844621514, 'recall_micro': 0.902788844621514}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8985248100134108, 'recall': 0.7992047713717694, 'f1-score': 0.8459595959595959, 'support': 2515.0}, 'not_entailment': {'precision': 0.9045909692046099, 'recall': 0.9547357926221336, 'f1-score': 0.9289871944121071, 'support': 5015.0}, 'accuracy': 0.902788844621514, 'macro avg': {'precision': 0.9015578896090104, 'recall': 0.8769702819969515, 'f1-score': 0.8874733951858516, 'support': 7530.0}, 'weighted avg': {'precision': 0.9025648881467261, 'recall': 0.902788844621514, 'f1-score': 0.9012561970537983, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 4624.83 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5822.11 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.809996327820992, 'f1_macro': 0.9021149719379333, 'f1_micro': 0.9163346613545816, 'accuracy_balanced': 0.8877599686731645, 'accuracy': 0.9163346613545816, 'precision_macro': 0.9230026976021478, 'recall_macro': 0.8877599686731645, 'precision_micro': 0.9163346613545816, 'recall_micro': 0.9163346613545816}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9385188635305077, 'recall': 0.8018304814962196, 'f1-score': 0.8648068669527897, 'support': 2513.0}, 'not_entailment': {'precision': 0.9074865316737879, 'recall': 0.9736894558501096, 'f1-score': 0.9394230769230769, 'support': 5017.0}, 'accuracy': 0.9163346613545816, 'macro avg': {'precision': 0.9230026976021478, 'recall': 0.8877599686731645, 'f1-score': 0.9021149719379333, 'support': 7530.0}, 'weighted avg': {'precision': 0.9178430057715219, 'recall': 0.9163346613545816, 'f1-score': 0.9145212793592878, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 3713.32 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5261.27 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.8090865589879329, 'f1_macro': 0.9044606953198133, 'f1_micro': 0.9155378486055777, 'accuracy_balanced': 0.9019030383997114, 'accuracy': 0.9155378486055777, 'precision_macro': 0.9072008652519085, 'recall_macro': 0.9019030383997114, 'precision_micro': 0.9155378486055777, 'recall_micro': 0.9155378486055777}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8833129334965321, 'recall': 0.8608349900596421, 'f1-score': 0.8719291180024165, 'support': 2515.0}, 'not_entailment': {'precision': 0.9310887970072849, 'recall': 0.9429710867397807, 'f1-score': 0.9369922726372102, 'support': 5015.0}, 'accuracy': 0.9155378486055777, 'macro avg': {'precision': 0.9072008652519085, 'recall': 0.9019030383997114, 'f1-score': 0.9044606953198133, 'support': 7530.0}, 'weighted avg': {'precision': 0.9151317854894171, 'recall': 0.9155378486055777, 'f1-score': 0.9152613517996927, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 4970.79 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 6130.56 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.8100952069285499, 'f1_macro': 0.9050284918880274, 'f1_micro': 0.9154050464807437, 'accuracy_balanced': 0.9062871272380613, 'accuracy': 0.9154050464807437, 'precision_macro': 0.9038118612998058, 'recall_macro': 0.9062871272380613, 'precision_micro': 0.9154050464807437, 'recall_micro': 0.9154050464807437}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8682965299684543, 'recall': 0.8790419161676647, 'f1-score': 0.8736361832969649, 'support': 2505.0}, 'not_entailment': {'precision': 0.9393271926311574, 'recall': 0.9335323383084577, 'f1-score': 0.9364208004790897, 'support': 5025.0}, 'accuracy': 0.9154050464807437, 'macro avg': {'precision': 0.9038118612998058, 'recall': 0.9062871272380613, 'f1-score': 0.9050284918880274, 'support': 7530.0}, 'weighted avg': {'precision': 0.9156974701915728, 'recall': 0.9154050464807437, 'f1-score': 0.9155342844045582, 'support': 7530.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 3774.98 examples/s]
Map: 100%|██████████| 7530/7530 [00:01<00:00, 5355.38 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7888637921804589, 'f1_macro': 0.8899049767610823, 'f1_micro': 0.9069057104913678, 'accuracy_balanced': 0.8726837842055779, 'accuracy': 0.9069057104913678, 'precision_macro': 0.9174491277772236, 'recall_macro': 0.8726837842055779, 'precision_micro': 0.9069057104913678, 'recall_micro': 0.9069057104913678}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9406903257170637, 'recall': 0.7696897374701671, 'f1-score': 0.8466418726755633, 'support': 2514.0}, 'not_entailment': {'precision': 0.8942079298373835, 'recall': 0.9756778309409888, 'f1-score': 0.9331680808466012, 'support': 5016.0}, 'accuracy': 0.9069057104913678, 'macro avg': {'precision': 0.9174491277772236, 'recall': 0.8726837842055779, 'f1-score': 0.8899049767610823, 'support': 7530.0}, 'weighted avg': {'precision': 0.9097267536410377, 'recall': 0.9069057104913678, 'f1-score': 0.9042800479990595, 'support': 7530.0}} 

100
CPU times: total: 1h 42min 38s
Wall time: 4h 45min 9s


In [5]:
res_fr = pd.DataFrame({'n':shots_list, 'mcc':mcc_list})
res_fr['accuracy'] = acc_list
res_fr.groupby('n').mean()

Unnamed: 0_level_0,mcc,accuracy
n,Unnamed: 1_level_1,Unnamed: 2_level_1
10,0.766817,0.894843
25,0.75696,0.893465
50,0.782889,0.904472
100,0.806834,0.914635


### Zero Shot

In [6]:
pipe = pipeline("zero-shot-classification", model = modname, device = 0, batch_size = 32)
res = pipe(list(fr['premise'].str.lower()), ['freedom and rights except voting.'], hypothesis_template = 'This text is about {}', multi_label = False)
labels = [round(label['scores'][0], 0) for label in res]
fr['0_shot'] = labels
fr['0_shot'].replace({0:1, 1:0}, inplace = True)

zs_fr = pd.DataFrame({'n':0, 'mcc':matthews_corrcoef(fr['entailment'], fr['0_shot']), 'accuracy':accuracy_score(fr['entailment'], fr['0_shot'])}, index = [0])
res_fr = pd.concat([res_fr, zs_fr], axis = 0)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  fr['0_shot'].replace({0:1, 1:0}, inplace = True)


In [7]:
res_fr.to_csv('motn_fewshot_large2.csv', index = False)

# Covid

In [8]:
df = pd.read_csv('covid_tweets_labeled.csv')
df = df[['text', 'non_comp']]
df['hypothesis'] = 'The author of this tweet does not believe COVID is dangerous.'
df.rename({'text':'premise', 'non_comp':'entailment'}, axis = 1, inplace = True)
df['entailment'].replace({0:1, 1:0}, inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['entailment'].replace({0:1, 1:0}, inplace = True)


### Few Shot

In [9]:
training_args = TrainingArguments(output_dir=training_directory,
    logging_dir=f'{training_directory}/logs',
    lr_scheduler_type= "linear",
    group_by_length=False,
    learning_rate = 9e-6,
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 16,
    gradient_accumulation_steps = 1, 
    num_train_epochs=5,
    warmup_ratio=0.06,  
    weight_decay=0.01, 
    fp16=True,   
    fp16_full_eval=True,
    eval_strategy="no",
    seed=1,
    save_strategy="no",
    dataloader_num_workers = 12,
)

tokenizer = AutoTokenizer.from_pretrained(modname)

In [10]:
%%time
# Define a function to initialize the modelin the trainer. This will make results reproducible
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(modname, num_labels = 2, ignore_mismatched_sizes=True)

# Define the number of samples (shots) and random seeds to use
shots = [10, 25, 50, 100]
seeds = [1,2,3,4,5,6,7,8,9,10]

# Initialize lists to store results
mcc_list = []
acc_list = []
shots_list = []

# Iterate through different shot sizes
for shot in shots:
    # Iterate through different random seeds
    for seed in seeds:
        # Sample training data based on current shot size and seed
        train = df.sample(shot, random_state = seed)
        # Create validation set with remaining instances
        val = df[~df.index.isin(train.index)]
        
        # Create a DatasetDict with train and validation splits
        ds = DatasetDict({'train': Dataset.from_pandas(train, preserve_index=False), 'validation':Dataset.from_pandas(val, preserve_index=False)})
        # Tokenize the dataset
        dstok = ds.map(tokenize_function, batched = True)
        # Rename 'entailment' column to 'label'
        dstok = dstok.rename_columns({'entailment':'label'})
        # Define label mapping
        id2label = {0: "entailment", 1: "not_entailment"}
        
        # Initialize the Trainer
        trainer = Trainer(
            model_init = model_init,
            tokenizer=tokenizer,
            args=training_args,
            train_dataset=dstok['train'],
            eval_dataset=dstok['validation'],
            compute_metrics=lambda x: compute_metrics_standard(x, label_text_alphabetical=list(model.config.id2label.values()))
        )
        
        # Train the model
        trainer.train()
        # Make predictions on validation set
        res = trainer.predict(dstok['validation'])
        preds = np.argmax(res.predictions, axis=-1)
        
        # Calculate Matthews Correlation Coefficient
        mcc_res = matthews_corrcoef(val['entailment'], preds)
        mcc_list.append(mcc_res)
        # Calculate Accuracy
        acc_res = accuracy_score(val['entailment'], preds)
        acc_list.append(acc_res)
        # Store the current shot size
        shots_list.append(shot)
    
    # Print progress
    print(shot)

Map: 100%|██████████| 10/10 [00:00<00:00, 1082.37 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 3333.95 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.708103598451791, 'f1_macro': 0.8511007158453332, 'f1_micro': 0.8897319170460294, 'accuracy_balanced': 0.8329427722482741, 'accuracy': 0.8897319170460294, 'precision_macro': 0.8764991673752834, 'recall_macro': 0.8329427722482741, 'precision_micro': 0.8897319170460294, 'recall_micro': 0.8897319170460294}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8526077097505669, 'recall': 0.7107750472589792, 'f1-score': 0.7752577319587629, 'support': 529.0}, 'not_entailment': {'precision': 0.900390625, 'recall': 0.955110497237569, 'f1-score': 0.9269436997319035, 'support': 1448.0}, 'accuracy': 0.8897319170460294, 'macro avg': {'precision': 0.8764991673752834, 'recall': 0.8329427722482741, 'f1-score': 0.8511007158453332, 'support': 1977.0}, 'weighted avg': {'precision': 0.8876050093363934, 'recall': 0.8897319170460294, 'f1-score': 0.8863560027405067, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1424.45 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 2855.90 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7047478114122483, 'f1_macro': 0.8505993403921743, 'f1_micro': 0.8882144663631766, 'accuracy_balanced': 0.8362219459530197, 'accuracy': 0.8882144663631766, 'precision_macro': 0.8693017987586639, 'recall_macro': 0.8362219459530197, 'precision_micro': 0.8882144663631766, 'recall_micro': 0.8882144663631766}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.834061135371179, 'recall': 0.7248576850094877, 'f1-score': 0.7756345177664975, 'support': 527.0}, 'not_entailment': {'precision': 0.9045424621461488, 'recall': 0.9475862068965517, 'f1-score': 0.9255641630178512, 'support': 1450.0}, 'accuracy': 0.8882144663631766, 'macro avg': {'precision': 0.8693017987586639, 'recall': 0.8362219459530197, 'f1-score': 0.8505993403921743, 'support': 1977.0}, 'weighted avg': {'precision': 0.8857545718019864, 'recall': 0.8882144663631766, 'f1-score': 0.8855980916736613, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1170.91 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 2936.27 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7008248150086237, 'f1_macro': 0.8499544626593807, 'f1_micro': 0.8846737481031867, 'accuracy_balanced': 0.8425111939423775, 'accuracy': 0.8846737481031867, 'precision_macro': 0.8584958900748374, 'recall_macro': 0.8425111939423775, 'precision_micro': 0.8846737481031867, 'recall_micro': 0.8846737481031867}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.806060606060606, 'recall': 0.751412429378531, 'f1-score': 0.7777777777777778, 'support': 531.0}, 'not_entailment': {'precision': 0.9109311740890689, 'recall': 0.9336099585062241, 'f1-score': 0.9221311475409836, 'support': 1446.0}, 'accuracy': 0.8846737481031867, 'macro avg': {'precision': 0.8584958900748374, 'recall': 0.8425111939423775, 'f1-score': 0.8499544626593807, 'support': 1977.0}, 'weighted avg': {'precision': 0.8827641171223953, 'recall': 0.8846737481031867, 'f1-score': 0.883359453386071, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1448.66 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 3362.75 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6930735577235086, 'f1_macro': 0.8450925994079634, 'f1_micro': 0.8831562974203339, 'accuracy_balanced': 0.8322951845718533, 'accuracy': 0.8831562974203339, 'precision_macro': 0.8613887431399521, 'recall_macro': 0.8322951845718533, 'precision_micro': 0.8831562974203339, 'recall_micro': 0.8831562974203339}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8201284796573876, 'recall': 0.7226415094339622, 'f1-score': 0.7683049147442327, 'support': 530.0}, 'not_entailment': {'precision': 0.9026490066225166, 'recall': 0.9419488597097443, 'f1-score': 0.9218802840716943, 'support': 1447.0}, 'accuracy': 0.8831562974203339, 'macro avg': {'precision': 0.8613887431399521, 'recall': 0.8322951845718533, 'f1-score': 0.8450925994079634, 'support': 1977.0}, 'weighted avg': {'precision': 0.880526659990489, 'recall': 0.8831562974203339, 'f1-score': 0.8807093454052528, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1653.91 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 2986.65 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6142880784690236, 'f1_macro': 0.8045215300845905, 'f1_micro': 0.8386444107233182, 'accuracy_balanced': 0.8225108225108224, 'accuracy': 0.8386444107233182, 'precision_macro': 0.7925094423277086, 'recall_macro': 0.8225108225108224, 'precision_micro': 0.8386444107233182, 'recall_micro': 0.8386444107233182}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.6677367576243981, 'recall': 0.7878787878787878, 'f1-score': 0.7228496959165943, 'support': 528.0}, 'not_entailment': {'precision': 0.9172821270310192, 'recall': 0.8571428571428571, 'f1-score': 0.8861933642525865, 'support': 1449.0}, 'accuracy': 0.8386444107233182, 'macro avg': {'precision': 0.7925094423277086, 'recall': 0.8225108225108224, 'f1-score': 0.8045215300845905, 'support': 1977.0}, 'weighted avg': {'precision': 0.8506357157782646, 'recall': 0.8386444107233182, 'f1-score': 0.8425689551067069, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1380.66 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 2729.01 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7191376940045859, 'f1_macro': 0.8595601673850554, 'f1_micro': 0.8902377339403136, 'accuracy_balanced': 0.8584828562178195, 'accuracy': 0.8902377339403136, 'precision_macro': 0.8606581276957913, 'recall_macro': 0.8584828562178195, 'precision_micro': 0.8902377339403136, 'recall_micro': 0.8902377339403136}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7977099236641222, 'recall': 0.7901701323251418, 'f1-score': 0.7939221272554606, 'support': 529.0}, 'not_entailment': {'precision': 0.9236063317274604, 'recall': 0.9267955801104972, 'f1-score': 0.9251982075146501, 'support': 1448.0}, 'accuracy': 0.8902377339403136, 'macro avg': {'precision': 0.8606581276957913, 'recall': 0.8584828562178195, 'f1-score': 0.8595601673850554, 'support': 1977.0}, 'weighted avg': {'precision': 0.8899193312896728, 'recall': 0.8902377339403136, 'f1-score': 0.8900717297922873, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1488.45 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 2815.48 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7087107861480726, 'f1_macro': 0.8543499237198628, 'f1_micro': 0.8856853818917552, 'accuracy_balanced': 0.855203823953824, 'accuracy': 0.8856853818917552, 'precision_macro': 0.8535089887348128, 'recall_macro': 0.855203823953824, 'precision_micro': 0.8856853818917552, 'recall_micro': 0.8856853818917552}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7838345864661654, 'recall': 0.7897727272727273, 'f1-score': 0.7867924528301887, 'support': 528.0}, 'not_entailment': {'precision': 0.9231833910034603, 'recall': 0.9206349206349206, 'f1-score': 0.921907394609537, 'support': 1449.0}, 'accuracy': 0.8856853818917552, 'macro avg': {'precision': 0.8535089887348128, 'recall': 0.855203823953824, 'f1-score': 0.8543499237198628, 'support': 1977.0}, 'weighted avg': {'precision': 0.8859673218098885, 'recall': 0.8856853818917552, 'f1-score': 0.8858220687321997, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1526.20 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 3097.83 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6112276742661571, 'f1_macro': 0.8056024411780794, 'f1_micro': 0.8472432979261507, 'accuracy_balanced': 0.8067083882301274, 'accuracy': 0.8472432979261507, 'precision_macro': 0.8045231921636415, 'recall_macro': 0.8067083882301274, 'precision_micro': 0.8472432979261507, 'recall_micro': 0.8472432979261507}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7116104868913857, 'recall': 0.7196969696969697, 'f1-score': 0.7156308851224106, 'support': 528.0}, 'not_entailment': {'precision': 0.8974358974358975, 'recall': 0.893719806763285, 'f1-score': 0.8955739972337483, 'support': 1449.0}, 'accuracy': 0.8472432979261507, 'macro avg': {'precision': 0.8045231921636415, 'recall': 0.8067083882301274, 'f1-score': 0.8056024411780794, 'support': 1977.0}, 'weighted avg': {'precision': 0.8478072597183951, 'recall': 0.8472432979261507, 'f1-score': 0.8475163527244988, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1319.34 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 2688.78 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6242498647443661, 'f1_macro': 0.8013097534486392, 'f1_micro': 0.8275164390490642, 'accuracy_balanced': 0.8402600810452328, 'accuracy': 0.8275164390490642, 'precision_macro': 0.7863161999758324, 'recall_macro': 0.8402600810452328, 'precision_micro': 0.8275164390490642, 'recall_micro': 0.8275164390490642}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.6287671232876713, 'recall': 0.8676748582230623, 'f1-score': 0.7291501191421763, 'support': 529.0}, 'not_entailment': {'precision': 0.9438652766639936, 'recall': 0.8128453038674033, 'f1-score': 0.8734693877551021, 'support': 1448.0}, 'accuracy': 0.8275164390490642, 'macro avg': {'precision': 0.7863161999758324, 'recall': 0.8402600810452328, 'f1-score': 0.8013097534486392, 'support': 1977.0}, 'weighted avg': {'precision': 0.8595522148855037, 'recall': 0.8275164390490642, 'f1-score': 0.8348528510346985, 'support': 1977.0}} 



Map: 100%|██████████| 10/10 [00:00<00:00, 1589.11 examples/s]
Map: 100%|██████████| 1977/1977 [00:00<00:00, 3569.28 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6129001545266077, 'f1_macro': 0.8006865400581338, 'f1_micro': 0.8558421851289834, 'accuracy_balanced': 0.7793054676450133, 'accuracy': 0.8558421851289834, 'precision_macro': 0.8362327656759054, 'recall_macro': 0.7793054676450133, 'precision_micro': 0.8558421851289834, 'recall_micro': 0.8558421851289834}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8029556650246306, 'recall': 0.6139359698681732, 'f1-score': 0.695837780149413, 'support': 531.0}, 'not_entailment': {'precision': 0.8695098663271802, 'recall': 0.9446749654218534, 'f1-score': 0.9055352999668544, 'support': 1446.0}, 'accuracy': 0.8558421851289834, 'macro avg': {'precision': 0.8362327656759054, 'recall': 0.7793054676450133, 'f1-score': 0.8006865400581338, 'support': 1977.0}, 'weighted avg': {'precision': 0.8516341552034301, 'recall': 0.8558421851289834, 'f1-score': 0.84921290086566, 'support': 1977.0}} 

10


Map: 100%|██████████| 25/25 [00:00<00:00, 1990.16 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3638.83 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6742134740690049, 'f1_macro': 0.8344952100128296, 'f1_micro': 0.8634046890927625, 'accuracy_balanced': 0.8535599960234616, 'accuracy': 0.8634046890927625, 'precision_macro': 0.8214191464876817, 'recall_macro': 0.8535599960234616, 'precision_micro': 0.8634046890927625, 'recall_micro': 0.8634046890927625}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7082658022690438, 'recall': 0.8323809523809523, 'f1-score': 0.7653239929947461, 'support': 525.0}, 'not_entailment': {'precision': 0.9345724907063196, 'recall': 0.8747390396659708, 'f1-score': 0.903666427030913, 'support': 1437.0}, 'accuracy': 0.8634046890927625, 'macro avg': {'precision': 0.8214191464876817, 'recall': 0.8535599960234616, 'f1-score': 0.8344952100128296, 'support': 1962.0}, 'weighted avg': {'precision': 0.8740164196412993, 'recall': 0.8634046890927625, 'f1-score': 0.8666481916236819, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 1985.45 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3626.54 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7211036568611793, 'f1_macro': 0.8604598734576392, 'f1_micro': 0.891946992864424, 'accuracy_balanced': 0.8569619597208068, 'accuracy': 0.891946992864424, 'precision_macro': 0.8641777994672523, 'recall_macro': 0.8569619597208068, 'precision_micro': 0.891946992864424, 'recall_micro': 0.891946992864424}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8067061143984221, 'recall': 0.7820267686424475, 'f1-score': 0.7941747572815534, 'support': 523.0}, 'not_entailment': {'precision': 0.9216494845360824, 'recall': 0.9318971507991661, 'f1-score': 0.926744989633725, 'support': 1439.0}, 'accuracy': 0.891946992864424, 'macro avg': {'precision': 0.8641777994672523, 'recall': 0.8569619597208068, 'f1-score': 0.8604598734576392, 'support': 1962.0}, 'weighted avg': {'precision': 0.891009636125279, 'recall': 0.891946992864424, 'f1-score': 0.891406441458299, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2161.66 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3697.76 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7137199620608342, 'f1_macro': 0.8568314106043476, 'f1_micro': 0.8883792048929664, 'accuracy_balanced': 0.8548941246644796, 'accuracy': 0.8883792048929664, 'precision_macro': 0.8588367268165589, 'recall_macro': 0.8548941246644796, 'precision_micro': 0.8883792048929664, 'recall_micro': 0.8883792048929664}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7965116279069767, 'recall': 0.7828571428571428, 'f1-score': 0.7896253602305475, 'support': 525.0}, 'not_entailment': {'precision': 0.921161825726141, 'recall': 0.9269311064718163, 'f1-score': 0.9240374609781478, 'support': 1437.0}, 'accuracy': 0.8883792048929664, 'macro avg': {'precision': 0.8588367268165589, 'recall': 0.8548941246644796, 'f1-score': 0.8568314106043476, 'support': 1962.0}, 'weighted avg': {'precision': 0.8878074149947132, 'recall': 0.8883792048929664, 'f1-score': 0.8880709202582241, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2187.40 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3661.11 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6841076311156565, 'f1_macro': 0.8305683782937348, 'f1_micro': 0.8812436289500509, 'accuracy_balanced': 0.7993487560248332, 'accuracy': 0.8812436289500509, 'precision_macro': 0.8908511740331492, 'recall_macro': 0.7993487560248332, 'precision_micro': 0.8812436289500509, 'recall_micro': 0.8812436289500509}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9060773480662984, 'recall': 0.6223908918406073, 'f1-score': 0.7379077615298087, 'support': 527.0}, 'not_entailment': {'precision': 0.875625, 'recall': 0.9763066202090592, 'f1-score': 0.9232289950576607, 'support': 1435.0}, 'accuracy': 0.8812436289500509, 'macro avg': {'precision': 0.8908511740331492, 'recall': 0.7993487560248332, 'f1-score': 0.8305683782937348, 'support': 1962.0}, 'weighted avg': {'precision': 0.8838046062339139, 'recall': 0.8812436289500509, 'f1-score': 0.8734510694362652, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 1997.21 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3318.71 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6829091020641956, 'f1_macro': 0.8413783704497628, 'f1_micro': 0.8751274209989807, 'accuracy_balanced': 0.8445277525071228, 'accuracy': 0.8751274209989807, 'precision_macro': 0.8384087626384208, 'recall_macro': 0.8445277525071228, 'precision_micro': 0.8751274209989807, 'recall_micro': 0.8751274209989807}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7574626865671642, 'recall': 0.7792706333973128, 'f1-score': 0.7682119205298014, 'support': 521.0}, 'not_entailment': {'precision': 0.9193548387096774, 'recall': 0.9097848716169327, 'f1-score': 0.9145448203697244, 'support': 1441.0}, 'accuracy': 0.8751274209989807, 'macro avg': {'precision': 0.8384087626384208, 'recall': 0.8445277525071228, 'f1-score': 0.8413783704497628, 'support': 1962.0}, 'weighted avg': {'precision': 0.876365128584168, 'recall': 0.8751274209989807, 'f1-score': 0.8756867975274205, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2160.05 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3664.99 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7108988541766847, 'f1_macro': 0.8554490806281893, 'f1_micro': 0.8863404689092762, 'accuracy_balanced': 0.8556625167769705, 'accuracy': 0.8863404689092762, 'precision_macro': 0.8552364650691011, 'recall_macro': 0.8556625167769705, 'precision_micro': 0.8863404689092762, 'recall_micro': 0.8863404689092762}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7878787878787878, 'recall': 0.7893738140417458, 'f1-score': 0.7886255924170616, 'support': 527.0}, 'not_entailment': {'precision': 0.9225941422594143, 'recall': 0.9219512195121952, 'f1-score': 0.9222725688393169, 'support': 1435.0}, 'accuracy': 0.8863404689092762, 'macro avg': {'precision': 0.8552364650691011, 'recall': 0.8556625167769705, 'f1-score': 0.8554490806281893, 'support': 1962.0}, 'weighted avg': {'precision': 0.8864091311694091, 'recall': 0.8863404689092762, 'f1-score': 0.8863745277717691, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2253.02 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 2778.34 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6901105574124043, 'f1_macro': 0.844834635997274, 'f1_micro': 0.8802242609582059, 'accuracy_balanced': 0.8396560294263844, 'accuracy': 0.8802242609582059, 'precision_macro': 0.8505403556771546, 'recall_macro': 0.8396560294263844, 'precision_micro': 0.8802242609582059, 'recall_micro': 0.8802242609582059}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.79, 'recall': 0.7523809523809524, 'f1-score': 0.7707317073170732, 'support': 525.0}, 'not_entailment': {'precision': 0.9110807113543091, 'recall': 0.9269311064718163, 'f1-score': 0.918937564677475, 'support': 1437.0}, 'accuracy': 0.8802242609582059, 'macro avg': {'precision': 0.8505403556771546, 'recall': 0.8396560294263844, 'f1-score': 0.844834635997274, 'support': 1962.0}, 'weighted avg': {'precision': 0.8786814384384006, 'recall': 0.8802242609582059, 'f1-score': 0.8792800340382237, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2273.78 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3679.73 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6818280121882159, 'f1_macro': 0.8385974060161795, 'f1_micro': 0.8674821610601428, 'accuracy_balanced': 0.856714815498866, 'accuracy': 0.8674821610601428, 'precision_macro': 0.8258130991520394, 'recall_macro': 0.856714815498866, 'precision_micro': 0.8674821610601428, 'recall_micro': 0.8674821610601428}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.715927750410509, 'recall': 0.8336520076481836, 'f1-score': 0.7703180212014135, 'support': 523.0}, 'not_entailment': {'precision': 0.9356984478935698, 'recall': 0.8797776233495483, 'f1-score': 0.9068767908309455, 'support': 1439.0}, 'accuracy': 0.8674821610601428, 'macro avg': {'precision': 0.8258130991520394, 'recall': 0.856714815498866, 'f1-score': 0.8385974060161795, 'support': 1962.0}, 'weighted avg': {'precision': 0.8771153312862096, 'recall': 0.8674821610601428, 'f1-score': 0.8704750392936136, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 1720.10 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3138.08 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.64938723421324, 'f1_macro': 0.8234993579222778, 'f1_micro': 0.8664627930682977, 'accuracy_balanced': 0.8123934249128864, 'accuracy': 0.8664627930682977, 'precision_macro': 0.8374781175985995, 'recall_macro': 0.8123934249128864, 'precision_micro': 0.8664627930682977, 'recall_micro': 0.8664627930682977}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.782051282051282, 'recall': 0.6958174904942965, 'f1-score': 0.7364185110663984, 'support': 526.0}, 'not_entailment': {'precision': 0.892904953145917, 'recall': 0.9289693593314763, 'f1-score': 0.910580204778157, 'support': 1436.0}, 'accuracy': 0.8664627930682977, 'macro avg': {'precision': 0.8374781175985995, 'recall': 0.8123934249128864, 'f1-score': 0.8234993579222778, 'support': 1962.0}, 'weighted avg': {'precision': 0.8631857732296183, 'recall': 0.8664627930682977, 'f1-score': 0.863888537656656, 'support': 1962.0}} 



Map: 100%|██████████| 25/25 [00:00<00:00, 2171.77 examples/s]
Map: 100%|██████████| 1962/1962 [00:00<00:00, 3346.58 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6649028799652756, 'f1_macro': 0.8250949082227701, 'f1_micro': 0.8746177370030581, 'accuracy_balanced': 0.799621815681426, 'accuracy': 0.8746177370030581, 'precision_macro': 0.8688782130071745, 'recall_macro': 0.799621815681426, 'precision_micro': 0.8746177370030581, 'recall_micro': 0.8746177370030581}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8593350383631714, 'recall': 0.6375711574952562, 'f1-score': 0.7320261437908496, 'support': 527.0}, 'not_entailment': {'precision': 0.8784213876511776, 'recall': 0.9616724738675958, 'f1-score': 0.9181636726546906, 'support': 1435.0}, 'accuracy': 0.8746177370030581, 'macro avg': {'precision': 0.8688782130071745, 'recall': 0.799621815681426, 'f1-score': 0.8250949082227701, 'support': 1962.0}, 'weighted avg': {'precision': 0.8732947280819731, 'recall': 0.8746177370030581, 'f1-score': 0.8681664872768904, 'support': 1962.0}} 

25


Map: 100%|██████████| 50/50 [00:00<00:00, 1840.40 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 3306.22 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7022475342222833, 'f1_macro': 0.8494137338113704, 'f1_micro': 0.8771295818275684, 'accuracy_balanced': 0.8651517421745172, 'accuracy': 0.8771295818275684, 'precision_macro': 0.8376347024832109, 'recall_macro': 0.8651517421745172, 'precision_micro': 0.8771295818275684, 'recall_micro': 0.8771295818275684}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7368421052631579, 'recall': 0.839458413926499, 'f1-score': 0.7848101265822784, 'support': 517.0}, 'not_entailment': {'precision': 0.9384272997032641, 'recall': 0.8908450704225352, 'f1-score': 0.9140173410404624, 'support': 1420.0}, 'accuracy': 0.8771295818275684, 'macro avg': {'precision': 0.8376347024832109, 'recall': 0.8651517421745172, 'f1-score': 0.8494137338113704, 'support': 1937.0}, 'weighted avg': {'precision': 0.8846226814660235, 'recall': 0.8771295818275684, 'f1-score': 0.8795309549408852, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 2109.98 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 2665.57 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6915691866835016, 'f1_macro': 0.8410578171385367, 'f1_micro': 0.8843572534847702, 'accuracy_balanced': 0.8190358787152314, 'accuracy': 0.8843572534847702, 'precision_macro': 0.8747759827954786, 'recall_macro': 0.8190358787152314, 'precision_micro': 0.8843572534847702, 'recall_micro': 0.8843572534847702}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8581907090464548, 'recall': 0.6789168278529981, 'f1-score': 0.7580993520518359, 'support': 517.0}, 'not_entailment': {'precision': 0.8913612565445026, 'recall': 0.9591549295774648, 'f1-score': 0.9240162822252375, 'support': 1420.0}, 'accuracy': 0.8843572534847702, 'macro avg': {'precision': 0.8747759827954786, 'recall': 0.8190358787152314, 'f1-score': 0.8410578171385367, 'support': 1937.0}, 'weighted avg': {'precision': 0.8825077856841564, 'recall': 0.8843572534847702, 'f1-score': 0.8797317944092081, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 2021.76 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 2667.02 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6859234438348072, 'f1_macro': 0.8344056926694673, 'f1_micro': 0.8817759421786268, 'accuracy_balanced': 0.8066447341700175, 'accuracy': 0.8817759421786268, 'precision_macro': 0.8835798551014256, 'recall_macro': 0.8066447341700175, 'precision_micro': 0.8817759421786268, 'recall_micro': 0.8817759421786268}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8865435356200527, 'recall': 0.6436781609195402, 'f1-score': 0.7458379578246392, 'support': 522.0}, 'not_entailment': {'precision': 0.8806161745827985, 'recall': 0.9696113074204947, 'f1-score': 0.9229734275142953, 'support': 1415.0}, 'accuracy': 0.8817759421786268, 'macro avg': {'precision': 0.8835798551014256, 'recall': 0.8066447341700175, 'f1-score': 0.8344056926694673, 'support': 1937.0}, 'weighted avg': {'precision': 0.8822135325907732, 'recall': 0.8817759421786268, 'f1-score': 0.8752373845726327, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 2726.69 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 3658.00 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6838440032794001, 'f1_macro': 0.8269444752779755, 'f1_micro': 0.8807434176561694, 'accuracy_balanced': 0.7924929428369796, 'accuracy': 0.8807434176561694, 'precision_macro': 0.8997041913946587, 'recall_macro': 0.7924929428369796, 'precision_micro': 0.8807434176561694, 'recall_micro': 0.8807434176561694}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9287833827893175, 'recall': 0.6019230769230769, 'f1-score': 0.7304550758459744, 'support': 520.0}, 'not_entailment': {'precision': 0.870625, 'recall': 0.9830628087508821, 'f1-score': 0.9234338747099768, 'support': 1417.0}, 'accuracy': 0.8807434176561694, 'macro avg': {'precision': 0.8997041913946587, 'recall': 0.7924929428369796, 'f1-score': 0.8269444752779755, 'support': 1937.0}, 'weighted avg': {'precision': 0.8862379886682732, 'recall': 0.8807434176561694, 'f1-score': 0.8716274857531978, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 2555.48 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 2695.26 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7113239933256137, 'f1_macro': 0.8556329795651664, 'f1_micro': 0.8869385647909138, 'accuracy_balanced': 0.8576327288258099, 'accuracy': 0.8869385647909138, 'precision_macro': 0.8537021242029162, 'recall_macro': 0.8576327288258099, 'precision_micro': 0.8869385647909138, 'recall_micro': 0.8869385647909138}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7816091954022989, 'recall': 0.7953216374269005, 'f1-score': 0.7884057971014493, 'support': 513.0}, 'not_entailment': {'precision': 0.9257950530035336, 'recall': 0.9199438202247191, 'f1-score': 0.9228601620288834, 'support': 1424.0}, 'accuracy': 0.8869385647909138, 'macro avg': {'precision': 0.8537021242029162, 'recall': 0.8576327288258099, 'f1-score': 0.8556329795651664, 'support': 1937.0}, 'weighted avg': {'precision': 0.8876085042428555, 'recall': 0.8869385647909138, 'f1-score': 0.8872509265060265, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 1998.45 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 2688.16 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6967319498350035, 'f1_macro': 0.8421294092092069, 'f1_micro': 0.8859060402684564, 'accuracy_balanced': 0.8173253352152434, 'accuracy': 0.8859060402684564, 'precision_macro': 0.8824429978082371, 'recall_macro': 0.8173253352152434, 'precision_micro': 0.8859060402684564, 'recall_micro': 0.8859060402684564}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8765743073047859, 'recall': 0.6692307692307692, 'f1-score': 0.7589967284623773, 'support': 520.0}, 'not_entailment': {'precision': 0.8883116883116883, 'recall': 0.9654199011997178, 'f1-score': 0.9252620899560365, 'support': 1417.0}, 'accuracy': 0.8859060402684564, 'macro avg': {'precision': 0.8824429978082371, 'recall': 0.8173253352152434, 'f1-score': 0.8421294092092069, 'support': 1937.0}, 'weighted avg': {'precision': 0.8851607135447346, 'recall': 0.8859060402684564, 'f1-score': 0.8806270935818998, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 2706.74 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 3670.29 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6957882393670257, 'f1_macro': 0.8477005572087124, 'f1_micro': 0.8828084667010841, 'accuracy_balanced': 0.8427757977960755, 'accuracy': 0.8828084667010841, 'precision_macro': 0.8530888682589248, 'recall_macro': 0.8427757977960755, 'precision_micro': 0.8828084667010841, 'recall_micro': 0.8828084667010841}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7926829268292683, 'recall': 0.7572815533980582, 'f1-score': 0.7745779543197616, 'support': 515.0}, 'not_entailment': {'precision': 0.9134948096885813, 'recall': 0.9282700421940928, 'f1-score': 0.9208231600976631, 'support': 1422.0}, 'accuracy': 0.8828084667010841, 'macro avg': {'precision': 0.8530888682589248, 'recall': 0.8427757977960755, 'f1-score': 0.8477005572087124, 'support': 1937.0}, 'weighted avg': {'precision': 0.8813739425370345, 'recall': 0.8828084667010841, 'f1-score': 0.8819402065738534, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 2692.63 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 2674.28 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7003143821208688, 'f1_macro': 0.8498017819284788, 'f1_micro': 0.8848735157459989, 'accuracy_balanced': 0.8431589556432035, 'accuracy': 0.8848735157459989, 'precision_macro': 0.8572981454659055, 'recall_macro': 0.8431589556432035, 'precision_micro': 0.8848735157459989, 'recall_micro': 0.8848735157459989}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8020618556701031, 'recall': 0.7538759689922481, 'f1-score': 0.7772227772227772, 'support': 516.0}, 'not_entailment': {'precision': 0.912534435261708, 'recall': 0.932441942294159, 'f1-score': 0.9223807866341803, 'support': 1421.0}, 'accuracy': 0.8848735157459989, 'macro avg': {'precision': 0.8572981454659055, 'recall': 0.8431589556432035, 'f1-score': 0.8498017819284788, 'support': 1937.0}, 'weighted avg': {'precision': 0.8831054982099432, 'recall': 0.8848735157459989, 'f1-score': 0.8837119519122991, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 1916.87 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 2637.62 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6469221141903266, 'f1_macro': 0.8060970078050459, 'f1_micro': 0.8678368611254518, 'accuracy_balanced': 0.7719095990977802, 'accuracy': 0.8678368611254518, 'precision_macro': 0.8847861782161505, 'recall_macro': 0.7719095990977802, 'precision_micro': 0.8678368611254518, 'recall_micro': 0.8678368611254518}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.9102167182662538, 'recall': 0.564299424184261, 'f1-score': 0.6966824644549763, 'support': 521.0}, 'not_entailment': {'precision': 0.8593556381660471, 'recall': 0.9795197740112994, 'f1-score': 0.9155115511551155, 'support': 1416.0}, 'accuracy': 0.8678368611254518, 'macro avg': {'precision': 0.8847861782161505, 'recall': 0.7719095990977802, 'f1-score': 0.8060970078050459, 'support': 1937.0}, 'weighted avg': {'precision': 0.8730358770572231, 'recall': 0.8678368611254518, 'f1-score': 0.8566525144123315, 'support': 1937.0}} 



Map: 100%|██████████| 50/50 [00:00<00:00, 2276.47 examples/s]
Map: 100%|██████████| 1937/1937 [00:00<00:00, 2677.63 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.5521309465430239, 'f1_macro': 0.7497088553763617, 'f1_micro': 0.8368611254517295, 'accuracy_balanced': 0.7175565465756812, 'accuracy': 0.8368611254517295, 'precision_macro': 0.850309594136309, 'recall_macro': 0.7175565465756812, 'precision_micro': 0.8368611254517295, 'recall_micro': 0.8368611254517295}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8690909090909091, 'recall': 0.4605009633911368, 'f1-score': 0.6020151133501259, 'support': 519.0}, 'not_entailment': {'precision': 0.8315282791817088, 'recall': 0.9746121297602257, 'f1-score': 0.8974025974025974, 'support': 1418.0}, 'accuracy': 0.8368611254517295, 'macro avg': {'precision': 0.850309594136309, 'recall': 0.7175565465756812, 'f1-score': 0.7497088553763617, 'support': 1937.0}, 'weighted avg': {'precision': 0.8415928145058569, 'recall': 0.8368611254517295, 'f1-score': 0.8182564413761478, 'support': 1937.0}} 

50


Map: 100%|██████████| 100/100 [00:00<00:00, 2285.97 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 2626.33 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7353417187899457, 'f1_macro': 0.86715165737753, 'f1_micro': 0.8982511923688394, 'accuracy_balanced': 0.8589087418148472, 'accuracy': 0.8982511923688394, 'precision_macro': 0.8766468884671945, 'recall_macro': 0.8589087418148472, 'precision_micro': 0.8982511923688394, 'recall_micro': 0.8982511923688394}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8336886993603412, 'recall': 0.7742574257425743, 'f1-score': 0.8028747433264887, 'support': 505.0}, 'not_entailment': {'precision': 0.919605077574048, 'recall': 0.9435600578871202, 'f1-score': 0.9314285714285714, 'support': 1382.0}, 'accuracy': 0.8982511923688394, 'macro avg': {'precision': 0.8766468884671945, 'recall': 0.8589087418148472, 'f1-score': 0.86715165737753, 'support': 1887.0}, 'weighted avg': {'precision': 0.8966120881739834, 'recall': 0.8982511923688394, 'f1-score': 0.8970249237382949, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 2286.33 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 2684.61 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7130263255216285, 'f1_macro': 0.8498722205297076, 'f1_micro': 0.8913619501854796, 'accuracy_balanced': 0.8240231707351847, 'accuracy': 0.8913619501854796, 'precision_macro': 0.8922609452075128, 'recall_macro': 0.8240231707351847, 'precision_micro': 0.8913619501854796, 'recall_micro': 0.8913619501854796}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8937823834196891, 'recall': 0.6777996070726916, 'f1-score': 0.770949720670391, 'support': 509.0}, 'not_entailment': {'precision': 0.8907395069953364, 'recall': 0.9702467343976778, 'f1-score': 0.928794720389024, 'support': 1378.0}, 'accuracy': 0.8913619501854796, 'macro avg': {'precision': 0.8922609452075128, 'recall': 0.8240231707351847, 'f1-score': 0.8498722205297076, 'support': 1887.0}, 'weighted avg': {'precision': 0.8915602934818205, 'recall': 0.8913619501854796, 'f1-score': 0.886217558302758, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 2367.79 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 3343.73 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.721724815567229, 'f1_macro': 0.8566672608439583, 'f1_micro': 0.8945416004239534, 'accuracy_balanced': 0.8352173776501288, 'accuracy': 0.8945416004239534, 'precision_macro': 0.8884693516315909, 'recall_macro': 0.8352173776501288, 'precision_micro': 0.8945416004239534, 'recall_micro': 0.8945416004239534}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8777506112469438, 'recall': 0.7066929133858267, 'f1-score': 0.7829880043620502, 'support': 508.0}, 'not_entailment': {'precision': 0.8991880920162382, 'recall': 0.9637418419144308, 'f1-score': 0.9303465173258663, 'support': 1379.0}, 'accuracy': 0.8945416004239534, 'macro avg': {'precision': 0.8884693516315909, 'recall': 0.8352173776501288, 'f1-score': 0.8566672608439583, 'support': 1887.0}, 'weighted avg': {'precision': 0.8934168995250873, 'recall': 0.8945416004239534, 'f1-score': 0.8906760750441395, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 2893.68 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 3244.31 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6832156839468405, 'f1_macro': 0.8402234303458694, 'f1_micro': 0.8797032326444091, 'accuracy_balanced': 0.8277661570774368, 'accuracy': 0.8797032326444091, 'precision_macro': 0.8560340662936932, 'recall_macro': 0.8277661570774368, 'precision_micro': 0.8797032326444091, 'recall_micro': 0.8797032326444091}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8112359550561797, 'recall': 0.7162698412698413, 'f1-score': 0.7608008429926238, 'support': 504.0}, 'not_entailment': {'precision': 0.9008321775312067, 'recall': 0.9392624728850325, 'f1-score': 0.919646017699115, 'support': 1383.0}, 'accuracy': 0.8797032326444091, 'macro avg': {'precision': 0.8560340662936932, 'recall': 0.8277661570774368, 'f1-score': 0.8402234303458694, 'support': 1887.0}, 'weighted avg': {'precision': 0.876901866917845, 'recall': 0.8797032326444091, 'f1-score': 0.8772199614976992, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 2931.71 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 3127.41 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7107994133598456, 'f1_macro': 0.8543429750326301, 'f1_micro': 0.8834128245892952, 'accuracy_balanced': 0.8669518460466736, 'accuracy': 0.8834128245892952, 'precision_macro': 0.8442112442516765, 'recall_macro': 0.8669518460466736, 'precision_micro': 0.8834128245892952, 'recall_micro': 0.8834128245892952}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.7504553734061931, 'recall': 0.8323232323232324, 'f1-score': 0.789272030651341, 'support': 495.0}, 'not_entailment': {'precision': 0.9379671150971599, 'recall': 0.9015804597701149, 'f1-score': 0.9194139194139194, 'support': 1392.0}, 'accuracy': 0.8834128245892952, 'macro avg': {'precision': 0.8442112442516765, 'recall': 0.8669518460466736, 'f1-score': 0.8543429750326301, 'support': 1887.0}, 'weighted avg': {'precision': 0.8887788203769541, 'recall': 0.8834128245892952, 'f1-score': 0.8852749501836722, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 2228.20 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 3139.01 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7606639413266535, 'f1_macro': 0.8802644570085132, 'f1_micro': 0.9072602013778485, 'accuracy_balanced': 0.8771326247357142, 'accuracy': 0.9072602013778485, 'precision_macro': 0.883558457744192, 'recall_macro': 0.8771326247357142, 'precision_micro': 0.9072602013778485, 'recall_micro': 0.9072602013778485}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8343558282208589, 'recall': 0.8127490039840638, 'f1-score': 0.8234106962663976, 'support': 502.0}, 'not_entailment': {'precision': 0.932761087267525, 'recall': 0.9415162454873646, 'f1-score': 0.9371182177506289, 'support': 1385.0}, 'accuracy': 0.9072602013778485, 'macro avg': {'precision': 0.883558457744192, 'recall': 0.8771326247357142, 'f1-score': 0.8802644570085132, 'support': 1887.0}, 'weighted avg': {'precision': 0.9065822637161597, 'recall': 0.9072602013778485, 'f1-score': 0.9068685220510612, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 2977.94 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 3584.15 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.7369720259999388, 'f1_macro': 0.8644897797036435, 'f1_micro': 0.9003709591944886, 'accuracy_balanced': 0.8430291963490968, 'accuracy': 0.9003709591944886, 'precision_macro': 0.8958320260250673, 'recall_macro': 0.8430291963490968, 'precision_micro': 0.9003709591944886, 'recall_micro': 0.9003709591944886}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8878048780487805, 'recall': 0.7193675889328063, 'f1-score': 0.7947598253275109, 'support': 506.0}, 'not_entailment': {'precision': 0.9038591740013541, 'recall': 0.9666908037653874, 'f1-score': 0.9342197340797761, 'support': 1381.0}, 'accuracy': 0.9003709591944886, 'macro avg': {'precision': 0.8958320260250673, 'recall': 0.8430291963490968, 'f1-score': 0.8644897797036435, 'support': 1887.0}, 'weighted avg': {'precision': 0.8995542064592226, 'recall': 0.9003709591944886, 'f1-score': 0.8968234893375152, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 2342.58 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 2929.16 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6429052784111263, 'f1_macro': 0.8135737267043041, 'f1_micro': 0.8680445151033387, 'accuracy_balanced': 0.7878204629701635, 'accuracy': 0.8680445151033387, 'precision_macro': 0.8590147767323051, 'recall_macro': 0.7878204629701635, 'precision_micro': 0.8680445151033387, 'recall_micro': 0.8680445151033387}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8442622950819673, 'recall': 0.6167664670658682, 'f1-score': 0.71280276816609, 'support': 501.0}, 'not_entailment': {'precision': 0.873767258382643, 'recall': 0.9588744588744589, 'f1-score': 0.9143446852425181, 'support': 1386.0}, 'accuracy': 0.8680445151033387, 'macro avg': {'precision': 0.8590147767323051, 'recall': 0.7878204629701635, 'f1-score': 0.8135737267043041, 'support': 1887.0}, 'weighted avg': {'precision': 0.8659336671724477, 'recall': 0.8680445151033387, 'f1-score': 0.8608351460505254, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 3020.88 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 3696.70 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6910920417012987, 'f1_macro': 0.8424430253060922, 'f1_micro': 0.8828828828828829, 'accuracy_balanced': 0.8244123911822322, 'accuracy': 0.8828828828828829, 'precision_macro': 0.8680563867816184, 'recall_macro': 0.8244123911822322, 'precision_micro': 0.8828828828828829, 'recall_micro': 0.8828828828828829}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8412322274881516, 'recall': 0.6974459724950884, 'f1-score': 0.7626208378088077, 'support': 509.0}, 'not_entailment': {'precision': 0.8948805460750853, 'recall': 0.951378809869376, 'f1-score': 0.9222652128033767, 'support': 1378.0}, 'accuracy': 0.8828828828828829, 'macro avg': {'precision': 0.8680563867816184, 'recall': 0.8244123911822322, 'f1-score': 0.8424430253060922, 'support': 1887.0}, 'weighted avg': {'precision': 0.8804094309925473, 'recall': 0.8828828828828829, 'f1-score': 0.8792026866389698, 'support': 1887.0}} 



Map: 100%|██████████| 100/100 [00:00<00:00, 2566.56 examples/s]
Map: 100%|██████████| 1887/1887 [00:00<00:00, 3423.18 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Step,Training Loss


Aggregate metrics:  {'MCC': 0.6633062883250996, 'f1_macro': 0.8239313706118685, 'f1_micro': 0.8738738738738738, 'accuracy_balanced': 0.7980748916537717, 'accuracy': 0.8738738738738738, 'precision_macro': 0.8690139998798294, 'recall_macro': 0.7980748916537717, 'precision_micro': 0.8738738738738738, 'recall_micro': 0.8738738738738738}


  labels, preds_max, labels=np.sort(pd.factorize(label_text_alphabetical, sort=True)[0]),


Detailed metrics:  {'entailment': {'precision': 0.8609625668449198, 'recall': 0.6338582677165354, 'f1-score': 0.7301587301587301, 'support': 508.0}, 'not_entailment': {'precision': 0.8770654329147389, 'recall': 0.962291515591008, 'f1-score': 0.9177040110650069, 'support': 1379.0}, 'accuracy': 0.8738738738738738, 'macro avg': {'precision': 0.8690139998798294, 'recall': 0.7980748916537717, 'f1-score': 0.8239313706118685, 'support': 1887.0}, 'weighted avg': {'precision': 0.8727303741105692, 'recall': 0.8738738738738738, 'f1-score': 0.8672148734389399, 'support': 1887.0}} 

100
CPU times: total: 41min 8s
Wall time: 3h 44min 56s


In [11]:
res_df = pd.DataFrame({'n':shots_list, 'mcc':mcc_list})
res_df['accuracy'] = acc_list
res_df.groupby('n').mean()

Unnamed: 0_level_0,mcc,accuracy
n,Unnamed: 1_level_1,Unnamed: 2_level_1
10,0.669726,0.869095
25,0.687318,0.877523
50,0.67668,0.876923
100,0.705905,0.88797


# Covid Zero Shot

In [13]:
pipe = pipeline("zero-shot-classification", model = modname, device = 0, batch_size = 32)
res = pipe(list(df['premise'].str.lower()), ['The author of this tweet does not believe COVID is dangerous'], hypothesis_template = '{}.', multi_label = False)
labels = [round(label['scores'][0], 0) for label in res]
df['0_shot'] = labels
df['0_shot'].replace({0:1, 1:0}, inplace = True)

zs_df = pd.DataFrame({'n':0, 'mcc':matthews_corrcoef(df['entailment'], df['0_shot']), 'accuracy':accuracy_score(df['entailment'], df['0_shot'])}, index = [0])
res_df = pd.concat([res_df, zs_df], axis = 0)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['0_shot'].replace({0:1, 1:0}, inplace = True)


In [14]:
res_df.to_csv('covid_fewshot_large2.csv', index = False)