In [1]:
import sys,os
import json
project_root = os.path.dirname(os.getcwd())
sys.path.insert(0,project_root)

from src.data_handling.load_data import *
from src.data_handling.preprocessing import *
from src.models.model_setup import *
from src.trainer.training import *
from src.trainer.file_utils import *
from transformers import set_seed

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
seed_value = 42
set_seed(seed_value)
# supports tasks:
# cb
# rte
# sick
# mrpc
# boolq
# csqa ?????????? or mc?
# argument
# scitail
# imdb
# sst2
# qqp
# mnli

#tasks = ["cb","rte","sick","mrpc","boolq","argument","scitail","imdb","sst2","qqp","mnli"]
tasks = ["mrpc","boolq","argument","scitail","imdb","sst2","qqp","mnli"]
# task = "scitail"
model_name = "bert-base-uncased"
output_path = "C:/Users/Hector Auvinen/Desktop/eval_results/test_root_for_evals"
adapter_conf_path = "../src/configs/adapter_configs.json"

"""output_adapter = config["output_adapter"]
mh_adapter = config["mh_adapter"]
reduction_factor = config["reduction_factor"]
non_linearity = config["non_linearity"]"""

'output_adapter = config["output_adapter"]\nmh_adapter = config["mh_adapter"]\nreduction_factor = config["reduction_factor"]\nnon_linearity = config["non_linearity"]'

In [7]:
# MOVE THIS TO MODULE
with open(adapter_conf_path, "r") as json_file:
    adapter_configs = json.load(json_file)
adapter_configs

{'output_adapter_redf_2': {'output_adapter': True,
  'mh_adapter': False,
  'reduction_factor': 2,
  'non_linearity': 'relu'},
 'output_adapter_redf_16': {'output_adapter': True,
  'mh_adapter': False,
  'reduction_factor': 16,
  'non_linearity': 'relu'},
 'output_adapter_redf_64': {'output_adapter': True,
  'mh_adapter': False,
  'reduction_factor': 64,
  'non_linearity': 'relu'}}

In [10]:
for name,config in adapter_configs.items():
    print(name,config)
    output_dir = os.path.join(output_path,name)
    
    if not os.path.exists(output_dir):
        # If the folder doesn't exist, create it
        os.makedirs(output_dir)
        print(f"Folder '{output_dir}' created.")
    else:
        print(f"Folder '{output_dir}' already exists.")
    
    print(f"**********************************RUNNING CONFIG {name}*****************************")
    print("CONFIG",config)
    for task in tasks:
        print(f"**********************************RUNNING TASK {task}*****************************")
        # load dataset
        data = load_hf_dataset(task,debug=False)
        # get tokenizer (bert)
        tokenizer = get_tokenizer(model_name)
        # get encoding method for particular task
        encode = get_encoding(task)
        # apply encoding
        dataset = preprocess_dataset(data,encode,tokenizer)
        # get label count
        num_labels = get_label_count(dataset)
        # set up model (head with num labels)
        model = setup_model(model_name,num_labels,dataset)
        
        # set up adapter config
        adapter_config = adapters.BnConfig(
                                output_adapter=config["output_adapter"],
                                mh_adapter=config["mh_adapter"],
                                reduction_factor=config["reduction_factor"],
                                non_linearity=config["non_linearity"])

        # add adapter
        model = add_clf_adapter(task_name=task,model=model,num_labels=num_labels,adapter_config=adapter_config)
        
        # set up training args
        final_output = os.path.join(output_dir,task)
        default_args = TrainingParameters(output_dir=final_output,per_device_train_batch_size=1)
        default_args.lr_scheduler_type = "linear"
        train_args = get_training_arguments(default_args)
        
        # set up trainer
        trainer = get_trainer(train_args,dataset,model,early_stopping=3)
        # train
        trainer.train()
        
        # evaluate and write results to file
        eval_results = trainer.evaluate()
        write_eval_results(eval_results,output_dir,task,trainer,adapter_config)

    

output_adapter_redf_2 {'output_adapter': True, 'mh_adapter': False, 'reduction_factor': 2, 'non_linearity': 'relu'}
Folder 'C:/Users/Hector Auvinen/Desktop/eval_results/test_root_for_evals\output_adapter_redf_2' already exists.
**********************************RUNNING CONFIG output_adapter_redf_2*****************************
CONFIG {'output_adapter': True, 'mh_adapter': False, 'reduction_factor': 2, 'non_linearity': 'relu'}
**********************************RUNNING TASK mrpc*****************************
using bert tokenizer
getting encoding:
<function encode_mrpc at 0x000002E00F811550>
mapped
{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/13770 [00:54<?, ?it/s]
                                                     
  0%|          | 51/110040 [00:15<63:21:44,  2.07s/it]

{'eval_loss': 1.194008231163025, 'eval_accuracy': 0.3161764705882353, 'eval_runtime': 11.0719, 'eval_samples_per_second': 36.85, 'eval_steps_per_second': 4.606, 'epoch': 0.01}


                                                      
  0%|          | 100/110040 [00:32<2:41:09, 11.37it/s]

{'eval_loss': 1.3777238130569458, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.1982, 'eval_samples_per_second': 36.434, 'eval_steps_per_second': 4.554, 'epoch': 0.03}


                                                       
  0%|          | 150/110040 [00:48<2:40:57, 11.38it/s]

{'eval_loss': 1.0817980766296387, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.2598, 'eval_samples_per_second': 36.235, 'eval_steps_per_second': 4.529, 'epoch': 0.04}


  0%|          | 200/110040 [00:53<2:42:59, 11.23it/s] 

{'loss': 1.3456, 'learning_rate': 9.981824790985097e-05, 'epoch': 0.05}


                                                      
  0%|          | 200/110040 [01:04<2:42:59, 11.23it/s]

{'eval_loss': 1.1881022453308105, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.2053, 'eval_samples_per_second': 36.411, 'eval_steps_per_second': 4.551, 'epoch': 0.05}


                                                       
  0%|          | 250/110040 [01:20<2:42:15, 11.28it/s]

{'eval_loss': 1.534320592880249, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.2911, 'eval_samples_per_second': 36.135, 'eval_steps_per_second': 4.517, 'epoch': 0.07}


                                                       
  0%|          | 300/110040 [01:36<2:39:55, 11.44it/s]

{'eval_loss': 0.6966262459754944, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.2599, 'eval_samples_per_second': 36.235, 'eval_steps_per_second': 4.529, 'epoch': 0.08}


                                                       
  0%|          | 350/110040 [01:52<2:38:48, 11.51it/s]

{'eval_loss': 1.4446659088134766, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.33, 'eval_samples_per_second': 36.01, 'eval_steps_per_second': 4.501, 'epoch': 0.1}


  0%|          | 400/110040 [01:57<2:42:05, 11.27it/s] 

{'loss': 1.2046, 'learning_rate': 9.963649581970194e-05, 'epoch': 0.11}


                                                      
  0%|          | 400/110040 [02:09<2:42:05, 11.27it/s]

{'eval_loss': 0.8982949256896973, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.5439, 'eval_samples_per_second': 35.343, 'eval_steps_per_second': 4.418, 'epoch': 0.11}


                                                       
  0%|          | 450/110040 [02:24<2:39:13, 11.47it/s]

{'eval_loss': 1.4132839441299438, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.3382, 'eval_samples_per_second': 35.985, 'eval_steps_per_second': 4.498, 'epoch': 0.12}


                                                       
  0%|          | 500/110040 [02:42<3:13:39,  9.43it/s]

{'eval_loss': 1.0250717401504517, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.6238, 'eval_samples_per_second': 35.1, 'eval_steps_per_second': 4.388, 'epoch': 0.14}


                                                        
  0%|          | 550/110040 [02:59<2:41:08, 11.32it/s]

{'eval_loss': 1.3762117624282837, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.3718, 'eval_samples_per_second': 35.878, 'eval_steps_per_second': 4.485, 'epoch': 0.15}


  1%|          | 600/110040 [03:04<2:42:30, 11.22it/s] 

{'loss': 1.0753, 'learning_rate': 9.945474372955289e-05, 'epoch': 0.16}


                                                      
  1%|          | 600/110040 [03:15<2:42:30, 11.22it/s]

{'eval_loss': 1.3952772617340088, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.2386, 'eval_samples_per_second': 36.303, 'eval_steps_per_second': 4.538, 'epoch': 0.16}


                                                       
  1%|          | 650/110040 [03:31<2:40:52, 11.33it/s]

{'eval_loss': 0.9769465327262878, 'eval_accuracy': 0.6936274509803921, 'eval_runtime': 11.3725, 'eval_samples_per_second': 35.876, 'eval_steps_per_second': 4.485, 'epoch': 0.18}


                                                       
  1%|          | 700/110040 [03:47<2:38:26, 11.50it/s]

{'eval_loss': 1.156307339668274, 'eval_accuracy': 0.7034313725490197, 'eval_runtime': 11.2817, 'eval_samples_per_second': 36.165, 'eval_steps_per_second': 4.521, 'epoch': 0.19}


                                                       
  1%|          | 750/110040 [04:04<2:40:10, 11.37it/s]

{'eval_loss': 1.8081706762313843, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.5607, 'eval_samples_per_second': 35.292, 'eval_steps_per_second': 4.411, 'epoch': 0.2}


  1%|          | 800/110040 [04:09<2:40:14, 11.36it/s] 

{'loss': 1.2189, 'learning_rate': 9.927299163940386e-05, 'epoch': 0.22}


                                                      
  1%|          | 801/110040 [04:21<56:55:29,  1.88s/it]

{'eval_loss': 0.7481361627578735, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.442, 'eval_samples_per_second': 35.658, 'eval_steps_per_second': 4.457, 'epoch': 0.22}


                                                       
  1%|          | 850/110040 [04:37<2:38:40, 11.47it/s]

{'eval_loss': 0.8483681082725525, 'eval_accuracy': 0.6887254901960784, 'eval_runtime': 11.5716, 'eval_samples_per_second': 35.259, 'eval_steps_per_second': 4.407, 'epoch': 0.23}


                                                       
  1%|          | 900/110040 [04:53<2:38:35, 11.47it/s]

{'eval_loss': 1.4063222408294678, 'eval_accuracy': 0.6862745098039216, 'eval_runtime': 11.3587, 'eval_samples_per_second': 35.92, 'eval_steps_per_second': 4.49, 'epoch': 0.25}


                                                       
  1%|          | 950/110040 [05:09<2:39:59, 11.36it/s]

{'eval_loss': 1.4180783033370972, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.416, 'eval_samples_per_second': 35.739, 'eval_steps_per_second': 4.467, 'epoch': 0.26}


  1%|          | 1000/110040 [05:14<2:37:20, 11.55it/s]

{'loss': 1.2526, 'learning_rate': 9.909123954925483e-05, 'epoch': 0.27}


                                                       
  1%|          | 1000/110040 [05:25<2:37:20, 11.55it/s]

{'eval_loss': 1.5799509286880493, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.3857, 'eval_samples_per_second': 35.834, 'eval_steps_per_second': 4.479, 'epoch': 0.27}


                                                        
  1%|          | 1050/110040 [05:42<2:36:14, 11.63it/s]

{'eval_loss': 1.2308653593063354, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.7046, 'eval_samples_per_second': 34.858, 'eval_steps_per_second': 4.357, 'epoch': 0.29}


  1%|          | 1050/110040 [05:43<9:53:41,  3.06it/s]


{'train_runtime': 343.1602, 'train_samples_per_second': 320.667, 'train_steps_per_second': 320.667, 'train_loss': 1.2219644891648065, 'epoch': 0.29}


100%|██████████| 51/51 [00:11<00:00,  4.61it/s]
Using the latest cached version of the dataset since super_glue couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'boolq' at C:\Users\Hector Auvinen\.cache\huggingface\datasets\super_glue\boolq\1.0.3\b051de3f07b5fd5ab80398a4836458db56234e24 (last modified on Fri Feb  2 15:44:42 2024).


Writing eval results
{'eval_loss': 1.0250717401504517, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 11.3074, 'eval_samples_per_second': 36.083, 'eval_steps_per_second': 4.51, 'epoch': 0.29}
**********************************RUNNING TASK boolq*****************************
using bert tokenizer
getting encoding:
<function encode_boolq at 0x000002E00F811700>
mapped
{'train': ['question', 'passage', 'idx', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['question', 'passage', 'idx', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['question', 'passage', 'idx', 'label', 'input_ids', 'token_type_ids', 'attention_mask']}


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                     
  0%|          | 50/282810 [00:52<6:14:10, 12.59it/s]

{'eval_loss': 1.5164058208465576, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 48.6709, 'eval_samples_per_second': 67.186, 'eval_steps_per_second': 8.403, 'epoch': 0.01}


                                                       
  0%|          | 100/282810 [01:45<6:54:26, 11.37it/s]

{'eval_loss': 1.0875650644302368, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 48.3684, 'eval_samples_per_second': 67.606, 'eval_steps_per_second': 8.456, 'epoch': 0.01}


                                                        
  0%|          | 150/282810 [02:38<6:29:15, 12.10it/s]

{'eval_loss': 1.762028455734253, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 48.2818, 'eval_samples_per_second': 67.727, 'eval_steps_per_second': 8.471, 'epoch': 0.02}


  0%|          | 200/282810 [02:43<6:21:06, 12.36it/s]  

{'loss': 1.168, 'learning_rate': 9.992928114281673e-05, 'epoch': 0.02}


                                                      
  0%|          | 200/282810 [03:32<6:21:06, 12.36it/s]

{'eval_loss': 1.5358911752700806, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 49.3704, 'eval_samples_per_second': 66.234, 'eval_steps_per_second': 8.284, 'epoch': 0.02}


                                                        
  0%|          | 250/282810 [04:26<6:59:09, 11.24it/s]

{'eval_loss': 1.4753131866455078, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 48.9428, 'eval_samples_per_second': 66.813, 'eval_steps_per_second': 8.357, 'epoch': 0.03}


                                                        
  0%|          | 301/282810 [05:20<586:59:18,  7.48s/it]

{'eval_loss': 1.3973417282104492, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 49.2041, 'eval_samples_per_second': 66.458, 'eval_steps_per_second': 8.312, 'epoch': 0.03}


                                                        
  0%|          | 350/282810 [06:12<6:18:19, 12.44it/s]

{'eval_loss': 0.6718029975891113, 'eval_accuracy': 0.6214067278287462, 'eval_runtime': 48.3256, 'eval_samples_per_second': 67.666, 'eval_steps_per_second': 8.463, 'epoch': 0.04}


  0%|          | 400/282810 [06:17<6:18:41, 12.43it/s]  

{'loss': 1.2662, 'learning_rate': 9.985856228563348e-05, 'epoch': 0.04}


                                                      
  0%|          | 401/282810 [07:07<700:42:16,  8.93s/it]

{'eval_loss': 0.8280194997787476, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 50.0293, 'eval_samples_per_second': 65.362, 'eval_steps_per_second': 8.175, 'epoch': 0.04}


                                                        
  0%|          | 450/282810 [08:01<6:24:36, 12.24it/s]

{'eval_loss': 0.750065267086029, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 49.6817, 'eval_samples_per_second': 65.819, 'eval_steps_per_second': 8.232, 'epoch': 0.05}


                                                        
  0%|          | 500/282810 [08:56<6:26:06, 12.19it/s]

{'eval_loss': 0.7082879543304443, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 49.8662, 'eval_samples_per_second': 65.575, 'eval_steps_per_second': 8.202, 'epoch': 0.05}


                                                        
  0%|          | 550/282810 [09:50<6:33:04, 11.97it/s]

{'eval_loss': 1.537850260734558, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 49.5375, 'eval_samples_per_second': 66.011, 'eval_steps_per_second': 8.256, 'epoch': 0.06}


  0%|          | 600/282810 [09:55<6:16:33, 12.49it/s]  

{'loss': 1.0913, 'learning_rate': 9.97878434284502e-05, 'epoch': 0.06}


                                                      
  0%|          | 600/282810 [10:43<6:16:33, 12.49it/s]

{'eval_loss': 1.5515137910842896, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 48.1035, 'eval_samples_per_second': 67.978, 'eval_steps_per_second': 8.502, 'epoch': 0.06}


                                                        
  0%|          | 650/282810 [11:37<6:22:39, 12.29it/s]

{'eval_loss': 1.3113577365875244, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 50.1236, 'eval_samples_per_second': 65.239, 'eval_steps_per_second': 8.16, 'epoch': 0.07}


  0%|          | 650/282810 [11:38<84:12:07,  1.07s/it]


{'train_runtime': 698.2914, 'train_samples_per_second': 405.003, 'train_steps_per_second': 405.003, 'train_loss': 1.2020009084848258, 'epoch': 0.07}


100%|██████████| 409/409 [00:48<00:00,  8.43it/s]


Writing eval results
{'eval_loss': 0.7082879543304443, 'eval_accuracy': 0.6217125382262997, 'eval_runtime': 48.6612, 'eval_samples_per_second': 67.199, 'eval_steps_per_second': 8.405, 'epoch': 0.07}
**********************************RUNNING TASK argument*****************************
using bert tokenizer
getting encoding:
<function encode_argument at 0x000002E00F811670>
mapped
{'train': ['topic', 'sentence', 'label', 'set', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['topic', 'sentence', 'label', 'set', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['topic', 'sentence', 'label', 'set', 'input_ids', 'token_type_ids', 'attention_mask']}


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                      
  0%|          | 51/550230 [00:33<686:21:32,  4.49s/it]

{'eval_loss': 1.7608586549758911, 'eval_accuracy': 0.5631733594515181, 'eval_runtime': 29.2901, 'eval_samples_per_second': 69.716, 'eval_steps_per_second': 8.74, 'epoch': 0.0}


                                                       
  0%|          | 101/550230 [01:08<853:40:05,  5.59s/it]

{'eval_loss': 1.4985735416412354, 'eval_accuracy': 0.5631733594515181, 'eval_runtime': 31.0809, 'eval_samples_per_second': 65.699, 'eval_steps_per_second': 8.237, 'epoch': 0.01}


                                                        
  0%|          | 151/550230 [01:43<702:56:10,  4.60s/it]

{'eval_loss': 2.2671444416046143, 'eval_accuracy': 0.5631733594515181, 'eval_runtime': 30.0108, 'eval_samples_per_second': 68.042, 'eval_steps_per_second': 8.53, 'epoch': 0.01}


  0%|          | 200/550230 [01:47<12:22:28, 12.35it/s] 

{'loss': 1.5499, 'learning_rate': 9.996365156389147e-05, 'epoch': 0.01}


                                                       
  0%|          | 201/550230 [02:18<840:34:07,  5.50s/it]

{'eval_loss': 1.4128878116607666, 'eval_accuracy': 0.5631733594515181, 'eval_runtime': 30.612, 'eval_samples_per_second': 66.706, 'eval_steps_per_second': 8.363, 'epoch': 0.01}


                                                        
  0%|          | 251/550230 [02:52<705:49:38,  4.62s/it]

{'eval_loss': 0.9320893883705139, 'eval_accuracy': 0.5592556317335945, 'eval_runtime': 30.1425, 'eval_samples_per_second': 67.745, 'eval_steps_per_second': 8.493, 'epoch': 0.01}


                                                        
  0%|          | 301/550230 [03:26<814:51:42,  5.33s/it]

{'eval_loss': 1.6113479137420654, 'eval_accuracy': 0.5631733594515181, 'eval_runtime': 29.6427, 'eval_samples_per_second': 68.887, 'eval_steps_per_second': 8.636, 'epoch': 0.02}


                                                        
  0%|          | 350/550230 [04:01<12:17:04, 12.43it/s]

{'eval_loss': 1.6139072179794312, 'eval_accuracy': 0.5646425073457395, 'eval_runtime': 30.6086, 'eval_samples_per_second': 66.713, 'eval_steps_per_second': 8.364, 'epoch': 0.02}


  0%|          | 400/550230 [04:05<12:36:21, 12.12it/s] 

{'loss': 1.2868, 'learning_rate': 9.992730312778293e-05, 'epoch': 0.02}


                                                       
  0%|          | 401/550230 [04:36<833:53:28,  5.46s/it]

{'eval_loss': 1.7018665075302124, 'eval_accuracy': 0.5803134182174339, 'eval_runtime': 30.3755, 'eval_samples_per_second': 67.225, 'eval_steps_per_second': 8.428, 'epoch': 0.02}


                                                        
  0%|          | 451/550230 [05:10<688:44:33,  4.51s/it]

{'eval_loss': 0.9977561235427856, 'eval_accuracy': 0.5421155729676788, 'eval_runtime': 29.4086, 'eval_samples_per_second': 69.435, 'eval_steps_per_second': 8.705, 'epoch': 0.02}


                                                        
  0%|          | 500/550230 [05:44<12:14:31, 12.47it/s]

{'eval_loss': 1.2300513982772827, 'eval_accuracy': 0.5631733594515181, 'eval_runtime': 29.6595, 'eval_samples_per_second': 68.848, 'eval_steps_per_second': 8.631, 'epoch': 0.03}


                                                        
  0%|          | 551/550230 [06:18<687:02:34,  4.50s/it]

{'eval_loss': 1.2472760677337646, 'eval_accuracy': 0.5788442703232125, 'eval_runtime': 29.3544, 'eval_samples_per_second': 69.564, 'eval_steps_per_second': 8.721, 'epoch': 0.03}


  0%|          | 600/550230 [06:22<12:51:01, 11.88it/s] 

{'loss': 1.4417, 'learning_rate': 9.98909546916744e-05, 'epoch': 0.03}


                                                       
  0%|          | 600/550230 [06:52<12:51:01, 11.88it/s]

{'eval_loss': 1.584803819656372, 'eval_accuracy': 0.5626836434867777, 'eval_runtime': 29.7894, 'eval_samples_per_second': 68.548, 'eval_steps_per_second': 8.594, 'epoch': 0.03}


                                                        
  0%|          | 650/550230 [07:27<12:06:46, 12.60it/s]

{'eval_loss': 1.033402681350708, 'eval_accuracy': 0.5509304603330069, 'eval_runtime': 30.6917, 'eval_samples_per_second': 66.533, 'eval_steps_per_second': 8.341, 'epoch': 0.04}


                                                        
  0%|          | 701/550230 [08:01<817:34:27,  5.36s/it]

{'eval_loss': 2.059927463531494, 'eval_accuracy': 0.5729676787463271, 'eval_runtime': 29.7964, 'eval_samples_per_second': 68.532, 'eval_steps_per_second': 8.592, 'epoch': 0.04}


                                                        
  0%|          | 751/550230 [08:35<807:28:10,  5.29s/it]

{'eval_loss': 1.2870780229568481, 'eval_accuracy': 0.5886385896180215, 'eval_runtime': 29.4483, 'eval_samples_per_second': 69.342, 'eval_steps_per_second': 8.693, 'epoch': 0.04}


  0%|          | 800/550230 [08:39<12:23:33, 12.32it/s] 

{'loss': 1.5432, 'learning_rate': 9.985460625556586e-05, 'epoch': 0.04}


                                                       
  0%|          | 801/550230 [09:10<717:33:20,  4.70s/it]

{'eval_loss': 1.2078216075897217, 'eval_accuracy': 0.6033300685602351, 'eval_runtime': 30.6836, 'eval_samples_per_second': 66.55, 'eval_steps_per_second': 8.343, 'epoch': 0.04}


                                                        
  0%|          | 851/550230 [09:44<824:25:52,  5.40s/it]

{'eval_loss': 1.0222920179367065, 'eval_accuracy': 0.5901077375122429, 'eval_runtime': 30.0729, 'eval_samples_per_second': 67.902, 'eval_steps_per_second': 8.513, 'epoch': 0.05}


                                                        
  0%|          | 901/550230 [10:20<847:08:06,  5.55s/it]

{'eval_loss': 0.9366157650947571, 'eval_accuracy': 0.5994123408423114, 'eval_runtime': 30.876, 'eval_samples_per_second': 66.135, 'eval_steps_per_second': 8.291, 'epoch': 0.05}


                                                        
  0%|          | 951/550230 [10:53<685:04:37,  4.49s/it]

{'eval_loss': 1.2816725969314575, 'eval_accuracy': 0.6199804113614104, 'eval_runtime': 29.3774, 'eval_samples_per_second': 69.509, 'eval_steps_per_second': 8.714, 'epoch': 0.05}


  0%|          | 1000/550230 [10:57<12:26:31, 12.26it/s]

{'loss': 1.2367, 'learning_rate': 9.981825781945732e-05, 'epoch': 0.05}


                                                        
  0%|          | 1000/550230 [11:27<12:26:31, 12.26it/s]

{'eval_loss': 1.2419785261154175, 'eval_accuracy': 0.5607247796278159, 'eval_runtime': 30.2373, 'eval_samples_per_second': 67.533, 'eval_steps_per_second': 8.466, 'epoch': 0.05}


                                                         
  0%|          | 1050/550230 [12:04<14:41:31, 10.38it/s]

{'eval_loss': 0.9773395657539368, 'eval_accuracy': 0.6052889324191969, 'eval_runtime': 31.0904, 'eval_samples_per_second': 65.679, 'eval_steps_per_second': 8.234, 'epoch': 0.06}


                                                         
  0%|          | 1101/550230 [12:41<728:43:28,  4.78s/it]

{'eval_loss': 1.0648025274276733, 'eval_accuracy': 0.6111655239960823, 'eval_runtime': 31.1846, 'eval_samples_per_second': 65.481, 'eval_steps_per_second': 8.209, 'epoch': 0.06}


                                                         
  0%|          | 1150/550230 [13:15<12:23:45, 12.30it/s]

{'eval_loss': 1.0281845331192017, 'eval_accuracy': 0.5837414299706171, 'eval_runtime': 30.5464, 'eval_samples_per_second': 66.849, 'eval_steps_per_second': 8.381, 'epoch': 0.06}


  0%|          | 1200/550230 [13:19<12:14:25, 12.46it/s] 

{'loss': 1.3103, 'learning_rate': 9.978190938334879e-05, 'epoch': 0.07}


                                                        
  0%|          | 1200/550230 [13:50<12:14:25, 12.46it/s]

{'eval_loss': 1.305862545967102, 'eval_accuracy': 0.6116552399608227, 'eval_runtime': 30.6624, 'eval_samples_per_second': 66.596, 'eval_steps_per_second': 8.349, 'epoch': 0.07}


                                                         
  0%|          | 1250/550230 [14:25<12:17:14, 12.41it/s]

{'eval_loss': 0.9749594330787659, 'eval_accuracy': 0.6092066601371204, 'eval_runtime': 30.0317, 'eval_samples_per_second': 67.995, 'eval_steps_per_second': 8.524, 'epoch': 0.07}


                                                         
  0%|          | 1300/550230 [15:00<12:08:45, 12.55it/s]

{'eval_loss': 1.2094569206237793, 'eval_accuracy': 0.6292850146914789, 'eval_runtime': 30.3088, 'eval_samples_per_second': 67.373, 'eval_steps_per_second': 8.446, 'epoch': 0.07}


                                                         
  0%|          | 1350/550230 [15:35<12:12:05, 12.50it/s]

{'eval_loss': 1.0527162551879883, 'eval_accuracy': 0.6258570029382958, 'eval_runtime': 30.4562, 'eval_samples_per_second': 67.047, 'eval_steps_per_second': 8.406, 'epoch': 0.07}


  0%|          | 1400/550230 [15:39<12:18:43, 12.38it/s] 

{'loss': 1.0927, 'learning_rate': 9.974556094724025e-05, 'epoch': 0.08}


                                                        
  0%|          | 1400/550230 [16:10<12:18:43, 12.38it/s]

{'eval_loss': 0.8772376775741577, 'eval_accuracy': 0.6474045053868757, 'eval_runtime': 30.6486, 'eval_samples_per_second': 66.626, 'eval_steps_per_second': 8.353, 'epoch': 0.08}


                                                         
  0%|          | 1451/550230 [16:44<804:05:46,  5.27s/it]

{'eval_loss': 1.0531339645385742, 'eval_accuracy': 0.6077375122428991, 'eval_runtime': 29.3871, 'eval_samples_per_second': 69.486, 'eval_steps_per_second': 8.711, 'epoch': 0.08}


                                                         
  0%|          | 1500/550230 [17:18<12:23:36, 12.30it/s]

{'eval_loss': 1.1153688430786133, 'eval_accuracy': 0.5754162585700294, 'eval_runtime': 29.6273, 'eval_samples_per_second': 68.923, 'eval_steps_per_second': 8.641, 'epoch': 0.08}


                                                         
  0%|          | 1551/550230 [17:53<834:13:10,  5.47s/it]

{'eval_loss': 1.1250855922698975, 'eval_accuracy': 0.6762977473065622, 'eval_runtime': 30.4495, 'eval_samples_per_second': 67.062, 'eval_steps_per_second': 8.407, 'epoch': 0.08}


  0%|          | 1600/550230 [17:57<12:23:55, 12.29it/s] 

{'loss': 1.0297, 'learning_rate': 9.970921251113172e-05, 'epoch': 0.09}


                                                        
  0%|          | 1601/550230 [18:28<817:52:55,  5.37s/it]

{'eval_loss': 1.0495933294296265, 'eval_accuracy': 0.67384916748286, 'eval_runtime': 29.8604, 'eval_samples_per_second': 68.385, 'eval_steps_per_second': 8.573, 'epoch': 0.09}


                                                         
  0%|          | 1650/550230 [19:04<14:35:26, 10.44it/s]

{'eval_loss': 0.9821365475654602, 'eval_accuracy': 0.6743388834476004, 'eval_runtime': 31.399, 'eval_samples_per_second': 65.034, 'eval_steps_per_second': 8.153, 'epoch': 0.09}


                                                         
  0%|          | 1700/550230 [19:40<14:44:23, 10.34it/s]

{'eval_loss': 1.2166441679000854, 'eval_accuracy': 0.6493633692458374, 'eval_runtime': 31.0855, 'eval_samples_per_second': 65.69, 'eval_steps_per_second': 8.235, 'epoch': 0.09}


                                                         
  0%|          | 1750/550230 [20:16<14:41:51, 10.37it/s]

{'eval_loss': 0.9147472977638245, 'eval_accuracy': 0.6420176297747306, 'eval_runtime': 31.0342, 'eval_samples_per_second': 65.798, 'eval_steps_per_second': 8.249, 'epoch': 0.1}


  0%|          | 1800/550230 [20:21<14:35:38, 10.44it/s] 

{'loss': 1.1708, 'learning_rate': 9.967286407502318e-05, 'epoch': 0.1}


                                                        
  0%|          | 1800/550230 [20:52<14:35:38, 10.44it/s]

{'eval_loss': 1.1432228088378906, 'eval_accuracy': 0.633692458374143, 'eval_runtime': 30.9092, 'eval_samples_per_second': 66.065, 'eval_steps_per_second': 8.282, 'epoch': 0.1}


                                                         
  0%|          | 1850/550230 [21:28<12:21:38, 12.32it/s]

{'eval_loss': 0.9404911994934082, 'eval_accuracy': 0.6567091087169442, 'eval_runtime': 31.1925, 'eval_samples_per_second': 65.464, 'eval_steps_per_second': 8.207, 'epoch': 0.1}


                                                         
  0%|          | 1900/550230 [22:02<12:23:13, 12.30it/s]

{'eval_loss': 1.2202110290527344, 'eval_accuracy': 0.6415279138099902, 'eval_runtime': 29.9992, 'eval_samples_per_second': 68.069, 'eval_steps_per_second': 8.534, 'epoch': 0.1}


                                                         
  0%|          | 1950/550230 [22:37<12:14:37, 12.44it/s]

{'eval_loss': 1.0223841667175293, 'eval_accuracy': 0.6650342801175319, 'eval_runtime': 30.4165, 'eval_samples_per_second': 67.135, 'eval_steps_per_second': 8.416, 'epoch': 0.11}


  0%|          | 2000/550230 [22:41<12:24:26, 12.27it/s] 

{'loss': 1.2472, 'learning_rate': 9.963651563891464e-05, 'epoch': 0.11}


                                                        
  0%|          | 2000/550230 [23:12<12:24:26, 12.27it/s]

{'eval_loss': 0.9061742424964905, 'eval_accuracy': 0.6596474045053868, 'eval_runtime': 30.4167, 'eval_samples_per_second': 67.134, 'eval_steps_per_second': 8.416, 'epoch': 0.11}


                                                         
  0%|          | 2050/550230 [23:47<12:11:00, 12.50it/s]

{'eval_loss': 1.7337301969528198, 'eval_accuracy': 0.6214495592556317, 'eval_runtime': 30.4458, 'eval_samples_per_second': 67.07, 'eval_steps_per_second': 8.408, 'epoch': 0.11}


                                                         
  0%|          | 2101/550230 [24:21<801:31:22,  5.26s/it]

{'eval_loss': 0.8802947402000427, 'eval_accuracy': 0.6513222331047992, 'eval_runtime': 29.3276, 'eval_samples_per_second': 69.627, 'eval_steps_per_second': 8.729, 'epoch': 0.11}


                                                         
  0%|          | 2151/550230 [24:56<827:39:36,  5.44s/it]

{'eval_loss': 0.9047412872314453, 'eval_accuracy': 0.6733594515181195, 'eval_runtime': 30.2869, 'eval_samples_per_second': 67.422, 'eval_steps_per_second': 8.453, 'epoch': 0.12}


  0%|          | 2200/550230 [25:00<14:28:58, 10.51it/s] 

{'loss': 1.1497, 'learning_rate': 9.960016720280611e-05, 'epoch': 0.12}


                                                        
  0%|          | 2200/550230 [25:31<14:28:58, 10.51it/s]

{'eval_loss': 0.9228912591934204, 'eval_accuracy': 0.6777668952007836, 'eval_runtime': 31.1489, 'eval_samples_per_second': 65.556, 'eval_steps_per_second': 8.219, 'epoch': 0.12}


                                                         
  0%|          | 2250/550230 [26:08<14:44:03, 10.33it/s]

{'eval_loss': 1.0207178592681885, 'eval_accuracy': 0.6263467189030363, 'eval_runtime': 31.1025, 'eval_samples_per_second': 65.654, 'eval_steps_per_second': 8.231, 'epoch': 0.12}


                                                         
  0%|          | 2301/550230 [26:44<839:42:34,  5.52s/it]

{'eval_loss': 0.9881203174591064, 'eval_accuracy': 0.6758080313418218, 'eval_runtime': 30.6215, 'eval_samples_per_second': 66.685, 'eval_steps_per_second': 8.36, 'epoch': 0.13}


                                                         
  0%|          | 2350/550230 [27:19<12:09:48, 12.51it/s]

{'eval_loss': 0.8389394283294678, 'eval_accuracy': 0.6856023506366308, 'eval_runtime': 30.846, 'eval_samples_per_second': 66.2, 'eval_steps_per_second': 8.299, 'epoch': 0.13}


  0%|          | 2400/550230 [27:23<12:20:01, 12.34it/s] 

{'loss': 1.2448, 'learning_rate': 9.956381876669757e-05, 'epoch': 0.13}


                                                        
  0%|          | 2400/550230 [27:54<12:20:01, 12.34it/s]

{'eval_loss': 0.8459873199462891, 'eval_accuracy': 0.6743388834476004, 'eval_runtime': 30.9955, 'eval_samples_per_second': 65.881, 'eval_steps_per_second': 8.259, 'epoch': 0.13}


                                                         
  0%|          | 2450/550230 [28:28<12:12:13, 12.47it/s]

{'eval_loss': 1.031544804573059, 'eval_accuracy': 0.643486777668952, 'eval_runtime': 29.5065, 'eval_samples_per_second': 69.205, 'eval_steps_per_second': 8.676, 'epoch': 0.13}


                                                         
  0%|          | 2500/550230 [29:03<12:17:50, 12.37it/s]

{'eval_loss': 0.8847281336784363, 'eval_accuracy': 0.6851126346718903, 'eval_runtime': 30.1926, 'eval_samples_per_second': 67.633, 'eval_steps_per_second': 8.479, 'epoch': 0.14}


                                                         
  0%|          | 2550/550230 [29:39<12:28:15, 12.20it/s]

{'eval_loss': 1.051991581916809, 'eval_accuracy': 0.6640548481880509, 'eval_runtime': 31.0144, 'eval_samples_per_second': 65.84, 'eval_steps_per_second': 8.254, 'epoch': 0.14}


  0%|          | 2600/550230 [29:43<12:15:48, 12.40it/s] 

{'loss': 1.0278, 'learning_rate': 9.952747033058903e-05, 'epoch': 0.14}


                                                        
  0%|          | 2600/550230 [30:14<12:15:48, 12.40it/s]

{'eval_loss': 1.4107129573822021, 'eval_accuracy': 0.6106758080313418, 'eval_runtime': 30.6623, 'eval_samples_per_second': 66.597, 'eval_steps_per_second': 8.349, 'epoch': 0.14}


                                                         
  0%|          | 2650/550230 [30:49<12:15:46, 12.40it/s]

{'eval_loss': 1.1832376718521118, 'eval_accuracy': 0.6214495592556317, 'eval_runtime': 29.9534, 'eval_samples_per_second': 68.172, 'eval_steps_per_second': 8.547, 'epoch': 0.14}


  0%|          | 2650/550230 [30:49<106:08:49,  1.43it/s]


{'train_runtime': 1849.3008, 'train_samples_per_second': 297.534, 'train_steps_per_second': 297.534, 'train_loss': 1.2578691014703716, 'epoch': 0.14}


100%|██████████| 256/256 [00:30<00:00,  8.31it/s]
Using the latest cached version of the dataset since scitail couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'tsv_format' at C:\Users\Hector Auvinen\.cache\huggingface\datasets\scitail\tsv_format\0.0.0\0cc4353235b289165dfde1c7c5d1be983f99ce44 (last modified on Sun Feb  4 13:43:06 2024).


Writing eval results
{'eval_loss': 0.8847281336784363, 'eval_accuracy': 0.6851126346718903, 'eval_runtime': 30.9384, 'eval_samples_per_second': 66.002, 'eval_steps_per_second': 8.275, 'epoch': 0.14}
**********************************RUNNING TASK scitail*****************************
using bert tokenizer
getting encoding:
<function encode_scitail at 0x000002E00F811820>
mapped
{'train': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask']}


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                      
  0%|          | 50/692910 [00:32<16:09:03, 11.92it/s]

{'eval_loss': 1.8265246152877808, 'eval_accuracy': 0.49616564417177916, 'eval_runtime': 28.3429, 'eval_samples_per_second': 46.008, 'eval_steps_per_second': 5.751, 'epoch': 0.0}


                                                       
  0%|          | 100/692910 [01:05<16:17:13, 11.82it/s]

{'eval_loss': 1.9955953359603882, 'eval_accuracy': 0.49616564417177916, 'eval_runtime': 28.0906, 'eval_samples_per_second': 46.421, 'eval_steps_per_second': 5.803, 'epoch': 0.0}


                                                        
  0%|          | 150/692910 [01:38<16:18:37, 11.80it/s]

{'eval_loss': 1.863434910774231, 'eval_accuracy': 0.49616564417177916, 'eval_runtime': 28.0541, 'eval_samples_per_second': 46.482, 'eval_steps_per_second': 5.81, 'epoch': 0.01}


  0%|          | 200/692910 [01:43<16:10:23, 11.90it/s] 

{'loss': 1.2407, 'learning_rate': 9.997113622259746e-05, 'epoch': 0.01}


                                                       
  0%|          | 200/692910 [02:10<16:10:23, 11.90it/s]

{'eval_loss': 1.456008791923523, 'eval_accuracy': 0.49616564417177916, 'eval_runtime': 27.7466, 'eval_samples_per_second': 46.997, 'eval_steps_per_second': 5.875, 'epoch': 0.01}


                                                        
  0%|          | 250/692910 [02:43<16:29:34, 11.67it/s]

{'eval_loss': 1.9911577701568604, 'eval_accuracy': 0.49616564417177916, 'eval_runtime': 28.0834, 'eval_samples_per_second': 46.433, 'eval_steps_per_second': 5.804, 'epoch': 0.01}


                                                        
  0%|          | 300/692910 [03:16<16:14:50, 11.84it/s]

{'eval_loss': 1.9311197996139526, 'eval_accuracy': 0.49616564417177916, 'eval_runtime': 27.978, 'eval_samples_per_second': 46.608, 'eval_steps_per_second': 5.826, 'epoch': 0.01}


                                                        
  0%|          | 350/692910 [03:48<16:05:31, 11.95it/s]

{'eval_loss': 2.1166043281555176, 'eval_accuracy': 0.49616564417177916, 'eval_runtime': 27.6817, 'eval_samples_per_second': 47.107, 'eval_steps_per_second': 5.888, 'epoch': 0.02}


  0%|          | 400/692910 [03:53<16:07:17, 11.93it/s] 

{'loss': 1.2018, 'learning_rate': 9.99422724451949e-05, 'epoch': 0.02}


                                                       
  0%|          | 400/692910 [04:21<16:07:17, 11.93it/s]

{'eval_loss': 0.6236879825592041, 'eval_accuracy': 0.7024539877300614, 'eval_runtime': 28.0872, 'eval_samples_per_second': 46.427, 'eval_steps_per_second': 5.803, 'epoch': 0.02}


                                                        
  0%|          | 450/692910 [04:53<16:02:51, 11.99it/s]

{'eval_loss': 0.6652292609214783, 'eval_accuracy': 0.7024539877300614, 'eval_runtime': 27.9095, 'eval_samples_per_second': 46.722, 'eval_steps_per_second': 5.84, 'epoch': 0.02}


                                                        
  0%|          | 500/692910 [05:26<16:08:59, 11.91it/s]

{'eval_loss': 1.6733368635177612, 'eval_accuracy': 0.5483128834355828, 'eval_runtime': 27.9719, 'eval_samples_per_second': 46.618, 'eval_steps_per_second': 5.827, 'epoch': 0.02}


                                                        
  0%|          | 550/692910 [05:59<16:04:14, 11.97it/s]

{'eval_loss': 0.8911234736442566, 'eval_accuracy': 0.6641104294478528, 'eval_runtime': 27.687, 'eval_samples_per_second': 47.098, 'eval_steps_per_second': 5.887, 'epoch': 0.02}


  0%|          | 600/692910 [06:04<16:49:01, 11.44it/s] 

{'loss': 1.0743, 'learning_rate': 9.991340866779236e-05, 'epoch': 0.03}


                                                       
  0%|          | 600/692910 [06:31<16:49:01, 11.44it/s]

{'eval_loss': 0.6554744243621826, 'eval_accuracy': 0.7162576687116564, 'eval_runtime': 27.7128, 'eval_samples_per_second': 47.054, 'eval_steps_per_second': 5.882, 'epoch': 0.03}


                                                        
  0%|          | 650/692910 [07:04<16:12:59, 11.86it/s]

{'eval_loss': 0.6892754435539246, 'eval_accuracy': 0.7730061349693251, 'eval_runtime': 27.8996, 'eval_samples_per_second': 46.739, 'eval_steps_per_second': 5.842, 'epoch': 0.03}


                                                        
  0%|          | 700/692910 [07:37<17:02:06, 11.29it/s]

{'eval_loss': 0.842995285987854, 'eval_accuracy': 0.6993865030674846, 'eval_runtime': 28.0291, 'eval_samples_per_second': 46.523, 'eval_steps_per_second': 5.815, 'epoch': 0.03}


                                                        
  0%|          | 750/692910 [08:10<16:07:36, 11.92it/s]

{'eval_loss': 1.8162579536437988, 'eval_accuracy': 0.5483128834355828, 'eval_runtime': 27.995, 'eval_samples_per_second': 46.58, 'eval_steps_per_second': 5.822, 'epoch': 0.03}


  0%|          | 800/692910 [08:14<15:59:20, 12.02it/s] 

{'loss': 1.1274, 'learning_rate': 9.98845448903898e-05, 'epoch': 0.03}


                                                       
  0%|          | 800/692910 [08:43<15:59:20, 12.02it/s]

{'eval_loss': 0.5892223715782166, 'eval_accuracy': 0.7822085889570553, 'eval_runtime': 28.132, 'eval_samples_per_second': 46.353, 'eval_steps_per_second': 5.794, 'epoch': 0.03}


                                                        
  0%|          | 850/692910 [09:16<16:09:09, 11.90it/s]

{'eval_loss': 0.8358713984489441, 'eval_accuracy': 0.7354294478527608, 'eval_runtime': 28.0914, 'eval_samples_per_second': 46.42, 'eval_steps_per_second': 5.802, 'epoch': 0.04}


                                                        
  0%|          | 900/692910 [09:49<16:02:27, 11.98it/s]

{'eval_loss': 0.5733907222747803, 'eval_accuracy': 0.8205521472392638, 'eval_runtime': 28.0395, 'eval_samples_per_second': 46.506, 'eval_steps_per_second': 5.813, 'epoch': 0.04}


                                                        
  0%|          | 950/692910 [10:21<16:06:08, 11.94it/s]

{'eval_loss': 0.8471399545669556, 'eval_accuracy': 0.8052147239263804, 'eval_runtime': 27.9891, 'eval_samples_per_second': 46.59, 'eval_steps_per_second': 5.824, 'epoch': 0.04}


  0%|          | 1000/692910 [10:26<16:11:44, 11.87it/s]

{'loss': 0.926, 'learning_rate': 9.985568111298727e-05, 'epoch': 0.04}


                                                        
  0%|          | 1000/692910 [10:54<16:11:44, 11.87it/s]

{'eval_loss': 0.9365258812904358, 'eval_accuracy': 0.7476993865030674, 'eval_runtime': 27.8822, 'eval_samples_per_second': 46.768, 'eval_steps_per_second': 5.846, 'epoch': 0.04}


                                                         
  0%|          | 1050/692910 [11:27<16:08:09, 11.91it/s]

{'eval_loss': 0.8444045186042786, 'eval_accuracy': 0.8105828220858896, 'eval_runtime': 28.0559, 'eval_samples_per_second': 46.479, 'eval_steps_per_second': 5.81, 'epoch': 0.05}


                                                         
  0%|          | 1100/692910 [11:59<16:08:14, 11.91it/s]

{'eval_loss': 0.8886200189590454, 'eval_accuracy': 0.7837423312883436, 'eval_runtime': 27.6439, 'eval_samples_per_second': 47.171, 'eval_steps_per_second': 5.896, 'epoch': 0.05}


                                                         
  0%|          | 1150/692910 [12:32<16:09:31, 11.89it/s]

{'eval_loss': 0.6730592250823975, 'eval_accuracy': 0.7998466257668712, 'eval_runtime': 27.7607, 'eval_samples_per_second': 46.973, 'eval_steps_per_second': 5.872, 'epoch': 0.05}


  0%|          | 1200/692910 [12:37<15:55:49, 12.06it/s] 

{'loss': 0.8676, 'learning_rate': 9.982681733558471e-05, 'epoch': 0.05}


                                                        
  0%|          | 1200/692910 [13:05<15:55:49, 12.06it/s]

{'eval_loss': 0.640994131565094, 'eval_accuracy': 0.8320552147239264, 'eval_runtime': 28.0657, 'eval_samples_per_second': 46.462, 'eval_steps_per_second': 5.808, 'epoch': 0.05}


                                                         
  0%|          | 1250/692910 [13:37<16:28:47, 11.66it/s]

{'eval_loss': 0.593244194984436, 'eval_accuracy': 0.8289877300613497, 'eval_runtime': 27.7011, 'eval_samples_per_second': 47.074, 'eval_steps_per_second': 5.884, 'epoch': 0.05}


                                                         
  0%|          | 1301/692910 [14:10<972:30:14,  5.06s/it]

{'eval_loss': 0.6688481569290161, 'eval_accuracy': 0.8343558282208589, 'eval_runtime': 28.0905, 'eval_samples_per_second': 46.421, 'eval_steps_per_second': 5.803, 'epoch': 0.06}


                                                         
  0%|          | 1350/692910 [14:42<16:22:33, 11.73it/s]

{'eval_loss': 0.7252223491668701, 'eval_accuracy': 0.8335889570552147, 'eval_runtime': 27.8958, 'eval_samples_per_second': 46.745, 'eval_steps_per_second': 5.843, 'epoch': 0.06}


  0%|          | 1400/692910 [14:47<16:06:14, 11.93it/s] 

{'loss': 0.7605, 'learning_rate': 9.979795355818217e-05, 'epoch': 0.06}


                                                        
  0%|          | 1400/692910 [15:15<16:06:14, 11.93it/s]

{'eval_loss': 1.1409426927566528, 'eval_accuracy': 0.7369631901840491, 'eval_runtime': 28.0845, 'eval_samples_per_second': 46.431, 'eval_steps_per_second': 5.804, 'epoch': 0.06}


                                                         
  0%|          | 1450/692910 [15:47<16:13:37, 11.84it/s]

{'eval_loss': 0.924621045589447, 'eval_accuracy': 0.8136503067484663, 'eval_runtime': 27.6019, 'eval_samples_per_second': 47.243, 'eval_steps_per_second': 5.905, 'epoch': 0.06}


                                                         
  0%|          | 1500/692910 [16:19<16:10:03, 11.88it/s]

{'eval_loss': 1.7094287872314453, 'eval_accuracy': 0.7239263803680982, 'eval_runtime': 27.2881, 'eval_samples_per_second': 47.786, 'eval_steps_per_second': 5.973, 'epoch': 0.06}


                                                         
  0%|          | 1550/692910 [16:52<16:12:34, 11.85it/s]

{'eval_loss': 0.7482693195343018, 'eval_accuracy': 0.8412576687116564, 'eval_runtime': 27.886, 'eval_samples_per_second': 46.762, 'eval_steps_per_second': 5.845, 'epoch': 0.07}


  0%|          | 1600/692910 [16:57<16:03:44, 11.96it/s] 

{'loss': 0.7365, 'learning_rate': 9.976908978077961e-05, 'epoch': 0.07}


                                                        
  0%|          | 1600/692910 [17:24<16:03:44, 11.96it/s]

{'eval_loss': 0.9823506474494934, 'eval_accuracy': 0.8090490797546013, 'eval_runtime': 27.6738, 'eval_samples_per_second': 47.12, 'eval_steps_per_second': 5.89, 'epoch': 0.07}


                                                         
  0%|          | 1650/692910 [17:57<16:10:01, 11.88it/s]

{'eval_loss': 0.6205458641052246, 'eval_accuracy': 0.8397239263803681, 'eval_runtime': 27.8936, 'eval_samples_per_second': 46.749, 'eval_steps_per_second': 5.844, 'epoch': 0.07}


                                                         
  0%|          | 1701/692910 [18:30<968:31:52,  5.04s/it]

{'eval_loss': 0.7308771014213562, 'eval_accuracy': 0.843558282208589, 'eval_runtime': 27.9856, 'eval_samples_per_second': 46.595, 'eval_steps_per_second': 5.824, 'epoch': 0.07}


                                                         
  0%|          | 1750/692910 [19:02<16:00:12, 12.00it/s]

{'eval_loss': 0.7804198265075684, 'eval_accuracy': 0.8320552147239264, 'eval_runtime': 28.2128, 'eval_samples_per_second': 46.22, 'eval_steps_per_second': 5.778, 'epoch': 0.08}


  0%|          | 1800/692910 [19:07<16:07:55, 11.90it/s] 

{'loss': 0.8227, 'learning_rate': 9.974022600337707e-05, 'epoch': 0.08}


                                                        
  0%|          | 1800/692910 [19:35<16:07:55, 11.90it/s]

{'eval_loss': 0.7469393610954285, 'eval_accuracy': 0.8144171779141104, 'eval_runtime': 28.2725, 'eval_samples_per_second': 46.122, 'eval_steps_per_second': 5.765, 'epoch': 0.08}


                                                         
  0%|          | 1850/692910 [20:08<16:08:37, 11.89it/s]

{'eval_loss': 0.5803383588790894, 'eval_accuracy': 0.8289877300613497, 'eval_runtime': 27.8431, 'eval_samples_per_second': 46.834, 'eval_steps_per_second': 5.854, 'epoch': 0.08}


                                                         
  0%|          | 1900/692910 [20:40<16:07:25, 11.90it/s]

{'eval_loss': 0.5710938572883606, 'eval_accuracy': 0.8397239263803681, 'eval_runtime': 27.7412, 'eval_samples_per_second': 47.006, 'eval_steps_per_second': 5.876, 'epoch': 0.08}


                                                         
  0%|          | 1950/692910 [21:13<16:02:25, 11.97it/s]

{'eval_loss': 0.7504948377609253, 'eval_accuracy': 0.8312883435582822, 'eval_runtime': 27.8785, 'eval_samples_per_second': 46.774, 'eval_steps_per_second': 5.847, 'epoch': 0.08}


  0%|          | 2000/692910 [21:17<16:00:22, 11.99it/s] 

{'loss': 0.7949, 'learning_rate': 9.971136222597451e-05, 'epoch': 0.09}


                                                        
  0%|          | 2000/692910 [21:45<16:00:22, 11.99it/s]

{'eval_loss': 0.6714715361595154, 'eval_accuracy': 0.8351226993865031, 'eval_runtime': 27.7252, 'eval_samples_per_second': 47.033, 'eval_steps_per_second': 5.879, 'epoch': 0.09}


                                                         
  0%|          | 2050/692910 [22:18<16:11:00, 11.86it/s]

{'eval_loss': 0.697845995426178, 'eval_accuracy': 0.7967791411042945, 'eval_runtime': 28.0408, 'eval_samples_per_second': 46.504, 'eval_steps_per_second': 5.813, 'epoch': 0.09}


                                                         
  0%|          | 2100/692910 [22:51<16:33:43, 11.59it/s]

{'eval_loss': 1.2965662479400635, 'eval_accuracy': 0.678680981595092, 'eval_runtime': 27.8793, 'eval_samples_per_second': 46.773, 'eval_steps_per_second': 5.847, 'epoch': 0.09}


                                                         
  0%|          | 2151/692910 [23:24<816:41:44,  4.26s/it]

{'eval_loss': 0.69056636095047, 'eval_accuracy': 0.8489263803680982, 'eval_runtime': 27.7898, 'eval_samples_per_second': 46.924, 'eval_steps_per_second': 5.865, 'epoch': 0.09}


  0%|          | 2200/692910 [23:28<16:26:36, 11.67it/s] 

{'loss': 0.828, 'learning_rate': 9.968249844857197e-05, 'epoch': 0.1}


                                                        
  0%|          | 2200/692910 [23:56<16:26:36, 11.67it/s]

{'eval_loss': 0.6310089826583862, 'eval_accuracy': 0.8450920245398773, 'eval_runtime': 27.9104, 'eval_samples_per_second': 46.721, 'eval_steps_per_second': 5.84, 'epoch': 0.1}


  0%|          | 2250/692910 [24:29<16:06:26, 11.91it/s] 

{'eval_loss': 0.6503744125366211, 'eval_accuracy': 0.8412576687116564, 'eval_runtime': 27.909, 'eval_samples_per_second': 46.723, 'eval_steps_per_second': 5.84, 'epoch': 0.1}


                                                         
  0%|          | 2300/692910 [25:01<16:50:46, 11.39it/s]

{'eval_loss': 0.6201594471931458, 'eval_accuracy': 0.8542944785276073, 'eval_runtime': 27.9482, 'eval_samples_per_second': 46.658, 'eval_steps_per_second': 5.832, 'epoch': 0.1}


                                                         
  0%|          | 2350/692910 [25:33<16:11:55, 11.84it/s]

{'eval_loss': 0.5109291076660156, 'eval_accuracy': 0.8588957055214724, 'eval_runtime': 27.7242, 'eval_samples_per_second': 47.035, 'eval_steps_per_second': 5.879, 'epoch': 0.1}


  0%|          | 2400/692910 [25:38<16:07:46, 11.89it/s] 

{'loss': 0.915, 'learning_rate': 9.965363467116941e-05, 'epoch': 0.1}


                                                        
  0%|          | 2400/692910 [26:06<16:07:46, 11.89it/s]

{'eval_loss': 0.44085589051246643, 'eval_accuracy': 0.8604294478527608, 'eval_runtime': 27.9231, 'eval_samples_per_second': 46.7, 'eval_steps_per_second': 5.837, 'epoch': 0.1}


                                                         
  0%|          | 2450/692910 [26:39<16:06:14, 11.91it/s]

{'eval_loss': 0.6541866064071655, 'eval_accuracy': 0.838957055214724, 'eval_runtime': 27.9743, 'eval_samples_per_second': 46.614, 'eval_steps_per_second': 5.827, 'epoch': 0.11}


                                                         
  0%|          | 2500/692910 [27:12<16:22:16, 11.71it/s]

{'eval_loss': 1.0351263284683228, 'eval_accuracy': 0.7937116564417178, 'eval_runtime': 28.0783, 'eval_samples_per_second': 46.442, 'eval_steps_per_second': 5.805, 'epoch': 0.11}


                                                          
  0%|          | 2550/692910 [27:45<16:05:31, 11.92it/s]

{'eval_loss': 0.8821899890899658, 'eval_accuracy': 0.7883435582822086, 'eval_runtime': 27.9895, 'eval_samples_per_second': 46.589, 'eval_steps_per_second': 5.824, 'epoch': 0.11}


  0%|          | 2600/692910 [27:50<16:38:54, 11.52it/s] 

{'loss': 0.7827, 'learning_rate': 9.962477089376688e-05, 'epoch': 0.11}


                                                        
  0%|          | 2600/692910 [28:18<16:38:54, 11.52it/s]

{'eval_loss': 0.6355807185173035, 'eval_accuracy': 0.8174846625766872, 'eval_runtime': 28.056, 'eval_samples_per_second': 46.479, 'eval_steps_per_second': 5.81, 'epoch': 0.11}


  0%|          | 2600/692910 [28:18<125:17:22,  1.53it/s]


{'train_runtime': 1698.8063, 'train_samples_per_second': 407.881, 'train_steps_per_second': 407.881, 'train_loss': 0.9290934753417969, 'epoch': 0.11}


100%|██████████| 163/163 [00:28<00:00,  5.81it/s]


Writing eval results
{'eval_loss': 0.6714715361595154, 'eval_accuracy': 0.8351226993865031, 'eval_runtime': 28.2721, 'eval_samples_per_second': 46.123, 'eval_steps_per_second': 5.765, 'epoch': 0.11}
**********************************RUNNING TASK imdb*****************************


ConnectionError: Couldn't reach 'imdb' on the Hub (ConnectionError)

In [5]:
##### full run:
####### MOVE THIS TO A .PY SCRIPT. ONLY HERE FOR A TEST RUN
task = "mrpc"
data = load_hf_dataset(task,debug=False)

tokenizer = get_tokenizer(model_name)

encode = get_encoding(task)

dataset = preprocess_dataset(data,encode,tokenizer)


num_labels = get_label_count(dataset)
print("labels",num_labels)
model = setup_model(model_name,num_labels,dataset)

adapter_config = adapters.BnConfig(
                          output_adapter=True,
                          mh_adapter=False,
                          reduction_factor=2,
                          non_linearity="relu")

model = add_clf_adapter(task_name=task,model=model,num_labels=num_labels,adapter_config=adapter_config)

default_args = TrainingParameters()
default_args.lr_scheduler_type = "linear"

train_args = get_training_arguments(default_args)

trainer = get_trainer(train_args,dataset,model,early_stopping=3)

trainer.train()

eval_results = trainer.evaluate()

write_eval_results(eval_results,output_dir,task,trainer,adapter_config)
##### full run ends. MOVE THIS.

Downloading data: 100%|██████████| 649k/649k [00:00<00:00, 1.66MB/s]
Downloading data: 100%|██████████| 75.7k/75.7k [00:00<00:00, 359kB/s]
Downloading data: 100%|██████████| 308k/308k [00:00<00:00, 1.35MB/s]
Generating train split: 100%|██████████| 3668/3668 [00:00<00:00, 353070.80 examples/s]
Generating validation split: 100%|██████████| 408/408 [00:00<00:00, 101637.82 examples/s]
Generating test split: 100%|██████████| 1725/1725 [00:00<00:00, 216201.24 examples/s]


using bert tokenizer
getting encoding:
<function encode_mrpc at 0x000002E00F811550>


Map: 100%|██████████| 3668/3668 [00:06<00:00, 590.85 examples/s]
Map: 100%|██████████| 408/408 [00:00<00:00, 577.79 examples/s]
Map: 100%|██████████| 1725/1725 [00:02<00:00, 599.53 examples/s]


mapped
{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}
labels 2


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 29/13770 [00:12<1:41:40,  2.25it/s]

KeyboardInterrupt: 

In [4]:
data = load_hf_dataset(task,debug=False)

tokenizer = get_tokenizer(model_name)

encode = get_encoding(task)

using bert tokenizer
getting encoding:
<function encode_scitail at 0x000002C001F6E550>


In [5]:
dataset = preprocess_dataset(data,encode,tokenizer)

Map:   0%|          | 0/23097 [00:00<?, ? examples/s]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not return

mapped
{'train': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask']}





In [6]:
#id2label = {id: label for (id,label) in enumerate(dataset["train"].features["labels"].names)}
#num_labels = len(id2label)
num_labels = get_label_count(dataset)
print("labels",num_labels)
model = setup_model(model_name,num_labels,dataset)

adapter_config = adapters.BnConfig(
                          output_adapter=True,
                          mh_adapter=False,
                          reduction_factor=2,
                          non_linearity="relu")

model = add_clf_adapter(task_name=task,model=model,num_labels=num_labels,adapter_config=adapter_config)

default_args = TrainingParameters()
default_args.lr_scheduler_type = "linear"


labels 2


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
train_args = get_training_arguments(default_args)

trainer = get_trainer(train_args,dataset,model,early_stopping=3)

trainer.train()

                                                    
  0%|          | 50/86640 [00:45<8:09:11,  2.95it/s]

{'eval_loss': 0.5120983719825745, 'eval_accuracy': 0.7576687116564417, 'eval_runtime': 26.097, 'eval_samples_per_second': 49.967, 'eval_steps_per_second': 6.246, 'epoch': 0.02}


  0%|          | 100/86640 [01:01<7:49:31,  3.07it/s] 
  0%|          | 100/86640 [01:27<7:49:31,  3.07it/s]

{'eval_loss': 0.482826828956604, 'eval_accuracy': 0.7507668711656442, 'eval_runtime': 26.8769, 'eval_samples_per_second': 48.517, 'eval_steps_per_second': 6.065, 'epoch': 0.03}


                                                       
  0%|          | 150/86640 [02:12<7:49:15,  3.07it/s]

{'eval_loss': 0.407736599445343, 'eval_accuracy': 0.8205521472392638, 'eval_runtime': 27.6917, 'eval_samples_per_second': 47.09, 'eval_steps_per_second': 5.886, 'epoch': 0.05}


  0%|          | 200/86640 [02:29<7:43:01,  3.11it/s]  

{'loss': 0.5546, 'learning_rate': 9.976915974145891e-05, 'epoch': 0.07}


                                                     
  0%|          | 200/86640 [02:57<7:43:01,  3.11it/s]

{'eval_loss': 0.36100077629089355, 'eval_accuracy': 0.8496932515337423, 'eval_runtime': 28.7014, 'eval_samples_per_second': 45.433, 'eval_steps_per_second': 5.679, 'epoch': 0.07}


                                                       
  0%|          | 250/86640 [03:43<9:12:41,  2.61it/s]

{'eval_loss': 0.36655500531196594, 'eval_accuracy': 0.8504601226993865, 'eval_runtime': 28.0162, 'eval_samples_per_second': 46.545, 'eval_steps_per_second': 5.818, 'epoch': 0.09}


                                                       
  0%|          | 300/86640 [04:28<8:00:13,  3.00it/s]

{'eval_loss': 0.3831276595592499, 'eval_accuracy': 0.8328220858895705, 'eval_runtime': 27.7914, 'eval_samples_per_second': 46.921, 'eval_steps_per_second': 5.865, 'epoch': 0.1}


                                                       
  0%|          | 350/86640 [05:13<8:13:31,  2.91it/s]

{'eval_loss': 0.3124072253704071, 'eval_accuracy': 0.8711656441717791, 'eval_runtime': 28.0155, 'eval_samples_per_second': 46.546, 'eval_steps_per_second': 5.818, 'epoch': 0.12}


  0%|          | 400/86640 [05:30<8:39:21,  2.77it/s]  

{'loss': 0.4675, 'learning_rate': 9.953831948291783e-05, 'epoch': 0.14}


                                                     
  0%|          | 400/86640 [05:57<8:39:21,  2.77it/s]

{'eval_loss': 0.4579084813594818, 'eval_accuracy': 0.8029141104294478, 'eval_runtime': 26.6335, 'eval_samples_per_second': 48.961, 'eval_steps_per_second': 6.12, 'epoch': 0.14}


                                                       
  1%|          | 450/86640 [06:41<8:00:06,  2.99it/s]

{'eval_loss': 0.3183535635471344, 'eval_accuracy': 0.8573619631901841, 'eval_runtime': 27.2307, 'eval_samples_per_second': 47.887, 'eval_steps_per_second': 5.986, 'epoch': 0.16}


                                                       
  1%|          | 500/86640 [07:27<8:23:01,  2.85it/s]

{'eval_loss': 0.28294530510902405, 'eval_accuracy': 0.8757668711656442, 'eval_runtime': 28.7131, 'eval_samples_per_second': 45.415, 'eval_steps_per_second': 5.677, 'epoch': 0.17}


                                                       
  1%|          | 550/86640 [08:13<8:04:11,  2.96it/s]

{'eval_loss': 0.3734060525894165, 'eval_accuracy': 0.8404907975460123, 'eval_runtime': 28.8571, 'eval_samples_per_second': 45.188, 'eval_steps_per_second': 5.649, 'epoch': 0.19}


  1%|          | 600/86640 [08:30<8:23:48,  2.85it/s]  

{'loss': 0.3736, 'learning_rate': 9.930747922437674e-05, 'epoch': 0.21}


                                                     
  1%|          | 600/86640 [09:00<8:23:48,  2.85it/s]

{'eval_loss': 0.29867643117904663, 'eval_accuracy': 0.870398773006135, 'eval_runtime': 29.1901, 'eval_samples_per_second': 44.673, 'eval_steps_per_second': 5.584, 'epoch': 0.21}


                                                       
  1%|          | 650/86640 [09:45<8:40:19,  2.75it/s]

{'eval_loss': 0.3124051094055176, 'eval_accuracy': 0.8734662576687117, 'eval_runtime': 27.8346, 'eval_samples_per_second': 46.848, 'eval_steps_per_second': 5.856, 'epoch': 0.23}


  1%|          | 650/86640 [09:46<21:32:15,  1.11it/s]

{'train_runtime': 586.0818, 'train_samples_per_second': 1182.275, 'train_steps_per_second': 147.829, 'train_loss': 0.4591100810124324, 'epoch': 0.23}





TrainOutput(global_step=650, training_loss=0.4591100810124324, metrics={'train_runtime': 586.0818, 'train_samples_per_second': 1182.275, 'train_steps_per_second': 147.829, 'train_loss': 0.4591100810124324, 'epoch': 0.23})

In [8]:
eval_results = trainer.evaluate()

100%|██████████| 163/163 [00:27<00:00,  5.85it/s]


In [8]:
write_eval_results(eval_results,output_dir,task,trainer,adapter_config)

Writing eval results
{'eval_loss': 0.6164497137069702, 'eval_accuracy': 0.6672782874617736, 'eval_runtime': 46.1558, 'eval_samples_per_second': 70.847, 'eval_steps_per_second': 8.861, 'epoch': 0.64}


In [9]:
"""for attr_name, attr_value in vars(default_args).items():
    print(f"{attr_name}: {attr_value}")"""
"""
    train_args = {"label_names":["labels"],
    "evaluation_strategy":"steps",
    "learning_rate":1e-4,
    "num_train_epochs":1,
    "per_device_train_batch_size":8,
    "per_device_eval_batch_size":8,
    "eval_steps":50,
    "logging_steps":200,
    "output_dir":"/eval_results",
    "overwrite_output_dir":True,
    "remove_unused_columns":False,
    "lr_scheduler_type":'linear',
    "load_best_model_at_end":True,
    "metric_for_best_model" : "accuracy",
    "early_stopping_patience":3,
    "save_total_limit":5
}
"""

'\n    train_args = {"label_names":["labels"],\n    "evaluation_strategy":"steps",\n    "learning_rate":1e-4,\n    "num_train_epochs":1,\n    "per_device_train_batch_size":8,\n    "per_device_eval_batch_size":8,\n    "eval_steps":50,\n    "logging_steps":200,\n    "output_dir":"/eval_results",\n    "overwrite_output_dir":True,\n    "remove_unused_columns":False,\n    "lr_scheduler_type":\'linear\',\n    "load_best_model_at_end":True,\n    "metric_for_best_model" : "accuracy",\n    "early_stopping_patience":3,\n    "save_total_limit":5\n}\n'