In [1]:
import sys,os
import json
project_root = os.path.dirname(os.getcwd())
sys.path.insert(0,project_root)

from src.data_handling.load_data import *
from src.data_handling.preprocessing import *
from src.models.model_setup import *
from src.trainer.training import *
from src.trainer.file_utils import *
from transformers import set_seed

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
seed_value = 42
set_seed(seed_value)
# supports tasks:
# cb
# rte
# sick
# mrpc
# boolq
# csqa ?????????? or mc?
# argument
# scitail
# imdb
# sst2
# qqp
# mnli

tasks = ["cb","rte","sick","mrpc","boolq","argument","scitail","imdb","sst2","qqp","mnli"]
#tasks = ["qqp","mnli"]
# task = "scitail"
model_name = "bert-base-uncased"
output_path = "C:/Users/Hector Auvinen/Desktop/eval_results/bigger_eval_steps"
adapter_conf_path = "../src/configs/adapter_configs.json"

"""output_adapter = config["output_adapter"]
mh_adapter = config["mh_adapter"]
reduction_factor = config["reduction_factor"]
non_linearity = config["non_linearity"]"""

'output_adapter = config["output_adapter"]\nmh_adapter = config["mh_adapter"]\nreduction_factor = config["reduction_factor"]\nnon_linearity = config["non_linearity"]'

In [3]:
# MOVE THIS TO MODULE
with open(adapter_conf_path, "r") as json_file:
    adapter_configs = json.load(json_file)
adapter_configs

{'output_adapter_redf_2': {'output_adapter': True,
  'mh_adapter': False,
  'reduction_factor': 2,
  'non_linearity': 'relu'},
 'output_adapter_redf_16': {'output_adapter': True,
  'mh_adapter': False,
  'reduction_factor': 16,
  'non_linearity': 'relu'},
 'output_adapter_redf_64': {'output_adapter': True,
  'mh_adapter': False,
  'reduction_factor': 64,
  'non_linearity': 'relu'}}

In [4]:
for name,config in adapter_configs.items():
    print(name,config)
    output_dir = os.path.join(output_path,name)
    
    if not os.path.exists(output_dir):
        # If the folder doesn't exist, create it
        os.makedirs(output_dir)
        print(f"Folder '{output_dir}' created.")
    else:
        print(f"Folder '{output_dir}' already exists.")
    
    print(f"**********************************RUNNING CONFIG {name}*****************************")
    print("CONFIG",config)
    for task in tasks:
        print(f"**********************************RUNNING TASK {task}*****************************")
        # load dataset
        data = load_hf_dataset(task,debug=False)
        # get tokenizer (bert)
        tokenizer = get_tokenizer(model_name)
        # get encoding method for particular task
        encode = get_encoding(task)
        # apply encoding
        dataset = preprocess_dataset(data,encode,tokenizer)
        # get label count
        num_labels = get_label_count(dataset)
        # set up model (head with num labels)
        model = setup_model(model_name,num_labels,dataset)
        
        # set up adapter config
        adapter_config = adapters.BnConfig(
                                output_adapter=config["output_adapter"],
                                mh_adapter=config["mh_adapter"],
                                reduction_factor=config["reduction_factor"],
                                non_linearity=config["non_linearity"])

        # add adapter
        model = add_clf_adapter(task_name=task,model=model,num_labels=num_labels,adapter_config=adapter_config)
        
        # set up training args
        final_output = os.path.join(output_dir,task)
        default_args = TrainingParameters(output_dir=final_output,
                                          per_device_train_batch_size=8,
                                          evaluation_strategy="epoch",
                                          eval_steps=1,
                                          save_strategy="epoch",
                                          logging_steps=200)
        default_args.lr_scheduler_type = "linear"
        train_args = get_training_arguments(default_args)
        
        # set up trainer
        trainer = get_trainer(train_args,dataset,model,early_stopping=3)
        # train
        trainer.train()
        
        # evaluate and write results to file
        eval_results = trainer.evaluate()
        write_eval_results(eval_results,output_dir,task,trainer,adapter_config)

    

output_adapter_redf_2 {'output_adapter': True, 'mh_adapter': False, 'reduction_factor': 2, 'non_linearity': 'relu'}
Folder 'C:/Users/Hector Auvinen/Desktop/eval_results/bigger_eval_steps\output_adapter_redf_2' already exists.
**********************************RUNNING CONFIG output_adapter_redf_2*****************************
CONFIG {'output_adapter': True, 'mh_adapter': False, 'reduction_factor': 2, 'non_linearity': 'relu'}
**********************************RUNNING TASK cb*****************************
using bert tokenizer
getting encoding:
<function encode_cb at 0x000001B0B12024C0>
mapped
{'train': ['premise', 'hypothesis', 'idx', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['premise', 'hypothesis', 'idx', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['premise', 'hypothesis', 'idx', 'label', 'input_ids', 'token_type_ids', 'attention_mask']}


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
                                                
  3%|▎         | 32/960 [00:13<04:53,  3.16it/s]

{'eval_loss': 0.817361056804657, 'eval_accuracy': 0.6607142857142857, 'eval_runtime': 1.0843, 'eval_samples_per_second': 51.646, 'eval_steps_per_second': 6.456, 'epoch': 1.0}


                                                
  7%|▋         | 64/960 [00:25<04:09,  3.60it/s]

{'eval_loss': 0.7494312524795532, 'eval_accuracy': 0.7321428571428571, 'eval_runtime': 1.1531, 'eval_samples_per_second': 48.566, 'eval_steps_per_second': 6.071, 'epoch': 2.0}


                                                
 10%|█         | 96/960 [00:36<03:51,  3.72it/s]

{'eval_loss': 0.6542282104492188, 'eval_accuracy': 0.7678571428571429, 'eval_runtime': 1.1029, 'eval_samples_per_second': 50.776, 'eval_steps_per_second': 6.347, 'epoch': 3.0}


                                                 
 13%|█▎        | 128/960 [00:48<04:36,  3.01it/s]

{'eval_loss': 0.7280062437057495, 'eval_accuracy': 0.7321428571428571, 'eval_runtime': 1.0357, 'eval_samples_per_second': 54.067, 'eval_steps_per_second': 6.758, 'epoch': 4.0}


                                                 
 17%|█▋        | 160/960 [00:59<03:18,  4.02it/s]

{'eval_loss': 0.5717395544052124, 'eval_accuracy': 0.8392857142857143, 'eval_runtime': 1.0494, 'eval_samples_per_second': 53.365, 'eval_steps_per_second': 6.671, 'epoch': 5.0}


                                                 
 20%|██        | 192/960 [01:10<03:22,  3.78it/s]

{'eval_loss': 0.4707796275615692, 'eval_accuracy': 0.8928571428571429, 'eval_runtime': 1.0867, 'eval_samples_per_second': 51.531, 'eval_steps_per_second': 6.441, 'epoch': 6.0}


 21%|██        | 200/960 [01:13<04:19,  2.93it/s]

{'loss': 0.3441, 'learning_rate': 7.916666666666666e-05, 'epoch': 6.25}


                                                 
 23%|██▎       | 224/960 [01:21<03:41,  3.32it/s]

{'eval_loss': 0.6213327050209045, 'eval_accuracy': 0.875, 'eval_runtime': 1.0375, 'eval_samples_per_second': 53.973, 'eval_steps_per_second': 6.747, 'epoch': 7.0}


                                                 
 27%|██▋       | 256/960 [01:33<03:15,  3.61it/s]

{'eval_loss': 0.5374923944473267, 'eval_accuracy': 0.8571428571428571, 'eval_runtime': 1.1302, 'eval_samples_per_second': 49.551, 'eval_steps_per_second': 6.194, 'epoch': 8.0}


                                                 
 30%|███       | 288/960 [01:45<02:52,  3.91it/s]

{'eval_loss': 0.6355876922607422, 'eval_accuracy': 0.875, 'eval_runtime': 1.0731, 'eval_samples_per_second': 52.186, 'eval_steps_per_second': 6.523, 'epoch': 9.0}


 30%|███       | 288/960 [01:45<04:07,  2.72it/s]


{'train_runtime': 105.9523, 'train_samples_per_second': 70.787, 'train_steps_per_second': 9.061, 'train_loss': 0.24277129645148912, 'epoch': 9.0}


100%|██████████| 7/7 [00:00<00:00,  8.02it/s]


Writing eval results
{'eval_loss': 0.4707796275615692, 'eval_accuracy': 0.8928571428571429, 'eval_runtime': 1.0306, 'eval_samples_per_second': 54.336, 'eval_steps_per_second': 6.792, 'epoch': 9.0}
**********************************RUNNING TASK rte*****************************
using bert tokenizer
getting encoding:
<function encode_rte at 0x000001B0B1202430>
mapped
{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  2%|▏         | 200/9360 [01:03<49:39,  3.07it/s]

{'loss': 0.7039, 'learning_rate': 9.786324786324787e-05, 'epoch': 0.64}


                                                    
  3%|▎         | 312/9360 [01:45<42:57,  3.51it/s]

{'eval_loss': 0.6773728728294373, 'eval_accuracy': 0.5992779783393501, 'eval_runtime': 5.5344, 'eval_samples_per_second': 50.05, 'eval_steps_per_second': 6.324, 'epoch': 1.0}


  4%|▍         | 400/9360 [02:13<47:53,  3.12it/s]  

{'loss': 0.6979, 'learning_rate': 9.572649572649574e-05, 'epoch': 1.28}


  6%|▋         | 600/9360 [03:18<46:26,  3.14it/s]  

{'loss': 0.6826, 'learning_rate': 9.35897435897436e-05, 'epoch': 1.92}


                                                  
  7%|▋         | 624/9360 [03:31<45:04,  3.23it/s]

{'eval_loss': 0.6542558670043945, 'eval_accuracy': 0.6137184115523465, 'eval_runtime': 5.3867, 'eval_samples_per_second': 51.423, 'eval_steps_per_second': 6.497, 'epoch': 2.0}


  9%|▊         | 800/9360 [04:28<51:40,  2.76it/s]  

{'loss': 0.6071, 'learning_rate': 9.145299145299146e-05, 'epoch': 2.56}


                                                  
 10%|█         | 936/9360 [05:18<39:53,  3.52it/s]

{'eval_loss': 0.7188912630081177, 'eval_accuracy': 0.5992779783393501, 'eval_runtime': 5.7398, 'eval_samples_per_second': 48.259, 'eval_steps_per_second': 6.098, 'epoch': 3.0}


 11%|█         | 1000/9360 [05:39<44:07,  3.16it/s] 

{'loss': 0.5805, 'learning_rate': 8.931623931623932e-05, 'epoch': 3.21}


 13%|█▎        | 1200/9360 [06:47<45:38,  2.98it/s]  

{'loss': 0.5246, 'learning_rate': 8.717948717948718e-05, 'epoch': 3.85}


                                                   
 13%|█▎        | 1248/9360 [07:07<34:16,  3.94it/s]

{'eval_loss': 0.7417796850204468, 'eval_accuracy': 0.628158844765343, 'eval_runtime': 5.2789, 'eval_samples_per_second': 52.473, 'eval_steps_per_second': 6.63, 'epoch': 4.0}


 15%|█▍        | 1400/9360 [07:56<40:54,  3.24it/s]  

{'loss': 0.4342, 'learning_rate': 8.504273504273504e-05, 'epoch': 4.49}


                                                   
 17%|█▋        | 1560/9360 [08:54<38:59,  3.33it/s]

{'eval_loss': 1.0329641103744507, 'eval_accuracy': 0.6570397111913358, 'eval_runtime': 5.5873, 'eval_samples_per_second': 49.576, 'eval_steps_per_second': 6.264, 'epoch': 5.0}


 17%|█▋        | 1600/9360 [09:07<39:42,  3.26it/s]  

{'loss': 0.4121, 'learning_rate': 8.290598290598292e-05, 'epoch': 5.13}


 19%|█▉        | 1800/9360 [10:09<38:41,  3.26it/s]

{'loss': 0.3141, 'learning_rate': 8.076923076923078e-05, 'epoch': 5.77}


                                                   
 20%|██        | 1872/9360 [10:37<33:12,  3.76it/s]

{'eval_loss': 1.6521663665771484, 'eval_accuracy': 0.6389891696750902, 'eval_runtime': 5.4174, 'eval_samples_per_second': 51.131, 'eval_steps_per_second': 6.461, 'epoch': 6.0}


 21%|██▏       | 2000/9360 [11:19<38:01,  3.23it/s]  

{'loss': 0.2666, 'learning_rate': 7.863247863247864e-05, 'epoch': 6.41}


                                                   
 23%|██▎       | 2184/9360 [12:22<30:18,  3.95it/s]

{'eval_loss': 1.8927199840545654, 'eval_accuracy': 0.6173285198555957, 'eval_runtime': 5.2878, 'eval_samples_per_second': 52.384, 'eval_steps_per_second': 6.619, 'epoch': 7.0}


 24%|██▎       | 2200/9360 [12:28<40:07,  2.97it/s]  

{'loss': 0.2583, 'learning_rate': 7.64957264957265e-05, 'epoch': 7.05}


 26%|██▌       | 2400/9360 [13:32<41:10,  2.82it/s]

{'loss': 0.201, 'learning_rate': 7.435897435897436e-05, 'epoch': 7.69}


                                                   
 27%|██▋       | 2496/9360 [14:09<43:23,  2.64it/s]

{'eval_loss': 2.390883684158325, 'eval_accuracy': 0.6245487364620939, 'eval_runtime': 5.2159, 'eval_samples_per_second': 53.107, 'eval_steps_per_second': 6.71, 'epoch': 8.0}


 27%|██▋       | 2496/9360 [14:09<38:57,  2.94it/s]


{'train_runtime': 849.9428, 'train_samples_per_second': 87.888, 'train_steps_per_second': 11.013, 'train_loss': 0.46425740076945377, 'epoch': 8.0}


100%|██████████| 35/35 [00:05<00:00,  6.92it/s]


Writing eval results
{'eval_loss': 1.0329641103744507, 'eval_accuracy': 0.6570397111913358, 'eval_runtime': 5.2112, 'eval_samples_per_second': 53.155, 'eval_steps_per_second': 6.716, 'epoch': 8.0}
**********************************RUNNING TASK sick*****************************
using bert tokenizer
getting encoding:
<function encode_sick at 0x000001B0B12023A0>
mapped
{'train': ['id', 'sentence_A', 'sentence_B', 'label', 'relatedness_score', 'entailment_AB', 'entailment_BA', 'sentence_A_original', 'sentence_B_original', 'sentence_A_dataset', 'sentence_B_dataset', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['id', 'sentence_A', 'sentence_B', 'label', 'relatedness_score', 'entailment_AB', 'entailment_BA', 'sentence_A_original', 'sentence_B_original', 'sentence_A_dataset', 'sentence_B_dataset', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['id', 'sentence_A', 'sentence_B', 'label', 'relatedness_score', 'entailment_AB', 'entailment_BA', 'sentence_A_original',

Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  1%|          | 200/16650 [01:27<1:57:34,  2.33it/s]

{'loss': 0.7267, 'learning_rate': 9.87987987987988e-05, 'epoch': 0.36}


  2%|▏         | 400/16650 [02:54<1:59:19,  2.27it/s]

{'loss': 0.5681, 'learning_rate': 9.75975975975976e-05, 'epoch': 0.72}


                                                     
  3%|▎         | 555/16650 [04:14<1:56:26,  2.30it/s]

{'eval_loss': 0.48676493763923645, 'eval_accuracy': 0.7919191919191919, 'eval_runtime': 13.5551, 'eval_samples_per_second': 36.518, 'eval_steps_per_second': 4.574, 'epoch': 1.0}


  4%|▎         | 600/16650 [04:34<1:53:52,  2.35it/s] 

{'loss': 0.5159, 'learning_rate': 9.639639639639641e-05, 'epoch': 1.08}


  5%|▍         | 800/16650 [06:04<2:22:56,  1.85it/s]

{'loss': 0.4411, 'learning_rate': 9.51951951951952e-05, 'epoch': 1.44}


  6%|▌         | 1000/16650 [07:32<1:56:39,  2.24it/s]

{'loss': 0.4642, 'learning_rate': 9.3993993993994e-05, 'epoch': 1.8}


                                                      
  7%|▋         | 1110/16650 [08:35<1:55:14,  2.25it/s]

{'eval_loss': 0.3867165744304657, 'eval_accuracy': 0.8383838383838383, 'eval_runtime': 13.0588, 'eval_samples_per_second': 37.905, 'eval_steps_per_second': 4.748, 'epoch': 2.0}


  7%|▋         | 1200/16650 [09:14<1:52:54,  2.28it/s] 

{'loss': 0.4147, 'learning_rate': 9.279279279279279e-05, 'epoch': 2.16}


  8%|▊         | 1400/16650 [10:43<1:52:24,  2.26it/s]

{'loss': 0.3658, 'learning_rate': 9.15915915915916e-05, 'epoch': 2.52}


 10%|▉         | 1600/16650 [12:10<1:50:22,  2.27it/s]

{'loss': 0.3817, 'learning_rate': 9.039039039039039e-05, 'epoch': 2.88}


                                                      
 10%|█         | 1665/16650 [12:51<1:45:52,  2.36it/s]

{'eval_loss': 0.5659084916114807, 'eval_accuracy': 0.793939393939394, 'eval_runtime': 13.0213, 'eval_samples_per_second': 38.015, 'eval_steps_per_second': 4.761, 'epoch': 3.0}


 11%|█         | 1800/16650 [13:50<1:45:59,  2.34it/s] 

{'loss': 0.3551, 'learning_rate': 8.918918918918919e-05, 'epoch': 3.24}


 12%|█▏        | 2000/16650 [15:16<1:41:03,  2.42it/s]

{'loss': 0.3233, 'learning_rate': 8.7987987987988e-05, 'epoch': 3.6}


 13%|█▎        | 2200/16650 [16:45<1:44:01,  2.32it/s]

{'loss': 0.3309, 'learning_rate': 8.678678678678678e-05, 'epoch': 3.96}


                                                      
 13%|█▎        | 2220/16650 [17:08<1:46:19,  2.26it/s]

{'eval_loss': 0.40396973490715027, 'eval_accuracy': 0.8666666666666667, 'eval_runtime': 13.3697, 'eval_samples_per_second': 37.024, 'eval_steps_per_second': 4.637, 'epoch': 4.0}


 14%|█▍        | 2400/16650 [18:28<1:41:57,  2.33it/s] 

{'loss': 0.2745, 'learning_rate': 8.55855855855856e-05, 'epoch': 4.32}


 16%|█▌        | 2600/16650 [19:56<1:46:03,  2.21it/s]

{'loss': 0.2898, 'learning_rate': 8.438438438438439e-05, 'epoch': 4.68}


                                                      
 17%|█▋        | 2775/16650 [21:26<1:41:05,  2.29it/s]

{'eval_loss': 0.5564153790473938, 'eval_accuracy': 0.8343434343434344, 'eval_runtime': 13.2138, 'eval_samples_per_second': 37.461, 'eval_steps_per_second': 4.692, 'epoch': 5.0}


 17%|█▋        | 2800/16650 [21:38<1:40:00,  2.31it/s] 

{'loss': 0.2745, 'learning_rate': 8.318318318318319e-05, 'epoch': 5.05}


 18%|█▊        | 3000/16650 [23:04<1:37:13,  2.34it/s]

{'loss': 0.2557, 'learning_rate': 8.198198198198198e-05, 'epoch': 5.41}


 19%|█▉        | 3200/16650 [24:31<1:36:29,  2.32it/s]

{'loss': 0.273, 'learning_rate': 8.078078078078079e-05, 'epoch': 5.77}


                                                      
 20%|██        | 3330/16650 [25:40<1:33:10,  2.38it/s]

{'eval_loss': 0.48182153701782227, 'eval_accuracy': 0.8484848484848485, 'eval_runtime': 12.9928, 'eval_samples_per_second': 38.098, 'eval_steps_per_second': 4.772, 'epoch': 6.0}


 20%|██        | 3400/16650 [26:11<1:35:02,  2.32it/s] 

{'loss': 0.2177, 'learning_rate': 7.957957957957959e-05, 'epoch': 6.13}


 22%|██▏       | 3600/16650 [27:37<1:33:17,  2.33it/s]

{'loss': 0.2025, 'learning_rate': 7.837837837837838e-05, 'epoch': 6.49}


 23%|██▎       | 3800/16650 [29:03<1:32:01,  2.33it/s]

{'loss': 0.2406, 'learning_rate': 7.717717717717718e-05, 'epoch': 6.85}


                                                      
 23%|██▎       | 3885/16650 [29:53<1:27:46,  2.42it/s]

{'eval_loss': 0.5587941408157349, 'eval_accuracy': 0.8464646464646465, 'eval_runtime': 13.135, 'eval_samples_per_second': 37.685, 'eval_steps_per_second': 4.72, 'epoch': 7.0}


 23%|██▎       | 3885/16650 [29:54<1:38:15,  2.17it/s]


{'train_runtime': 1794.1736, 'train_samples_per_second': 74.224, 'train_steps_per_second': 9.28, 'train_loss': 0.36113526480538505, 'epoch': 7.0}


100%|██████████| 62/62 [00:12<00:00,  5.02it/s]


Writing eval results
{'eval_loss': 0.40396973490715027, 'eval_accuracy': 0.8666666666666667, 'eval_runtime': 12.5569, 'eval_samples_per_second': 39.42, 'eval_steps_per_second': 4.938, 'epoch': 7.0}
**********************************RUNNING TASK mrpc*****************************
using bert tokenizer
getting encoding:
<function encode_mrpc at 0x000001B0B1202550>
mapped
{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  1%|▏         | 200/13770 [01:26<1:41:43,  2.22it/s]

{'loss': 0.6093, 'learning_rate': 9.854756717501816e-05, 'epoch': 0.44}


  3%|▎         | 400/13770 [02:53<1:35:58,  2.32it/s]

{'loss': 0.5362, 'learning_rate': 9.709513435003632e-05, 'epoch': 0.87}


                                                     
  3%|▎         | 459/13770 [03:29<1:26:27,  2.57it/s]

{'eval_loss': 0.39042845368385315, 'eval_accuracy': 0.8137254901960784, 'eval_runtime': 10.7252, 'eval_samples_per_second': 38.041, 'eval_steps_per_second': 4.755, 'epoch': 1.0}


  4%|▍         | 600/13770 [04:30<1:34:07,  2.33it/s] 

{'loss': 0.442, 'learning_rate': 9.564270152505447e-05, 'epoch': 1.31}


  6%|▌         | 800/13770 [05:57<1:34:44,  2.28it/s]

{'loss': 0.4318, 'learning_rate': 9.419026870007263e-05, 'epoch': 1.74}


                                                     
  7%|▋         | 918/13770 [06:59<1:20:58,  2.65it/s]

{'eval_loss': 0.34755048155784607, 'eval_accuracy': 0.8455882352941176, 'eval_runtime': 10.8199, 'eval_samples_per_second': 37.708, 'eval_steps_per_second': 4.714, 'epoch': 2.0}


  7%|▋         | 1000/13770 [07:36<1:37:23,  2.19it/s]

{'loss': 0.3884, 'learning_rate': 9.273783587509079e-05, 'epoch': 2.18}


  9%|▊         | 1200/13770 [09:04<1:30:53,  2.30it/s]

{'loss': 0.3457, 'learning_rate': 9.128540305010894e-05, 'epoch': 2.61}


                                                      
 10%|█         | 1377/13770 [10:32<1:26:14,  2.40it/s]

{'eval_loss': 0.38459905982017517, 'eval_accuracy': 0.8284313725490197, 'eval_runtime': 10.8439, 'eval_samples_per_second': 37.625, 'eval_steps_per_second': 4.703, 'epoch': 3.0}


 10%|█         | 1400/13770 [10:43<1:29:02,  2.32it/s] 

{'loss': 0.3253, 'learning_rate': 8.983297022512709e-05, 'epoch': 3.05}


 12%|█▏        | 1600/13770 [12:11<1:28:10,  2.30it/s]

{'loss': 0.2447, 'learning_rate': 8.838053740014525e-05, 'epoch': 3.49}


 13%|█▎        | 1800/13770 [13:38<1:27:45,  2.27it/s]

{'loss': 0.2732, 'learning_rate': 8.692810457516341e-05, 'epoch': 3.92}


                                                      
 13%|█▎        | 1836/13770 [14:04<1:14:25,  2.67it/s]

{'eval_loss': 0.42980140447616577, 'eval_accuracy': 0.8676470588235294, 'eval_runtime': 10.7367, 'eval_samples_per_second': 38.001, 'eval_steps_per_second': 4.75, 'epoch': 4.0}


 15%|█▍        | 2000/13770 [15:17<1:27:04,  2.25it/s] 

{'loss': 0.2114, 'learning_rate': 8.547567175018157e-05, 'epoch': 4.36}


 16%|█▌        | 2200/13770 [16:46<1:25:05,  2.27it/s]

{'loss': 0.1939, 'learning_rate': 8.402323892519971e-05, 'epoch': 4.79}


 17%|█▋        | 2295/13770 [17:27<1:15:10,  2.54it/s]

In [5]:
##### full run:
####### MOVE THIS TO A .PY SCRIPT. ONLY HERE FOR A TEST RUN
task = "mrpc"
data = load_hf_dataset(task,debug=False)

tokenizer = get_tokenizer(model_name)

encode = get_encoding(task)

dataset = preprocess_dataset(data,encode,tokenizer)


num_labels = get_label_count(dataset)
print("labels",num_labels)
model = setup_model(model_name,num_labels,dataset)

adapter_config = adapters.BnConfig(
                          output_adapter=True,
                          mh_adapter=False,
                          reduction_factor=2,
                          non_linearity="relu")

model = add_clf_adapter(task_name=task,model=model,num_labels=num_labels,adapter_config=adapter_config)

default_args = TrainingParameters()
default_args.lr_scheduler_type = "linear"

train_args = get_training_arguments(default_args)

trainer = get_trainer(train_args,dataset,model,early_stopping=3)

trainer.train()

eval_results = trainer.evaluate()

write_eval_results(eval_results,output_dir,task,trainer,adapter_config)
##### full run ends. MOVE THIS.

Downloading data: 100%|██████████| 649k/649k [00:00<00:00, 1.66MB/s]
Downloading data: 100%|██████████| 75.7k/75.7k [00:00<00:00, 359kB/s]
Downloading data: 100%|██████████| 308k/308k [00:00<00:00, 1.35MB/s]
Generating train split: 100%|██████████| 3668/3668 [00:00<00:00, 353070.80 examples/s]
Generating validation split: 100%|██████████| 408/408 [00:00<00:00, 101637.82 examples/s]
Generating test split: 100%|██████████| 1725/1725 [00:00<00:00, 216201.24 examples/s]


using bert tokenizer
getting encoding:
<function encode_mrpc at 0x000002E00F811550>


Map: 100%|██████████| 3668/3668 [00:06<00:00, 590.85 examples/s]
Map: 100%|██████████| 408/408 [00:00<00:00, 577.79 examples/s]
Map: 100%|██████████| 1725/1725 [00:02<00:00, 599.53 examples/s]


mapped
{'train': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['sentence1', 'sentence2', 'label', 'idx', 'input_ids', 'token_type_ids', 'attention_mask']}
labels 2


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 29/13770 [00:12<1:41:40,  2.25it/s]

KeyboardInterrupt: 

In [4]:
data = load_hf_dataset(task,debug=False)

tokenizer = get_tokenizer(model_name)

encode = get_encoding(task)

using bert tokenizer
getting encoding:
<function encode_scitail at 0x000002C001F6E550>


In [5]:
dataset = preprocess_dataset(data,encode,tokenizer)

Map:   0%|          | 0/23097 [00:00<?, ? examples/s]Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not returned for the setting you have chosen, i.e. sequence pairs with the 'longest_first' truncation strategy. So the returned list will always be empty even if some tokens have been removed.
Be aware, overflowing tokens are not return

mapped
{'train': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'test': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask'], 'validation': ['premise', 'hypothesis', 'label', 'input_ids', 'token_type_ids', 'attention_mask']}





In [6]:
#id2label = {id: label for (id,label) in enumerate(dataset["train"].features["labels"].names)}
#num_labels = len(id2label)
num_labels = get_label_count(dataset)
print("labels",num_labels)
model = setup_model(model_name,num_labels,dataset)

adapter_config = adapters.BnConfig(
                          output_adapter=True,
                          mh_adapter=False,
                          reduction_factor=2,
                          non_linearity="relu")

model = add_clf_adapter(task_name=task,model=model,num_labels=num_labels,adapter_config=adapter_config)

default_args = TrainingParameters()
default_args.lr_scheduler_type = "linear"


labels 2


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
train_args = get_training_arguments(default_args)

trainer = get_trainer(train_args,dataset,model,early_stopping=3)

trainer.train()

                                                    
  0%|          | 50/86640 [00:45<8:09:11,  2.95it/s]

{'eval_loss': 0.5120983719825745, 'eval_accuracy': 0.7576687116564417, 'eval_runtime': 26.097, 'eval_samples_per_second': 49.967, 'eval_steps_per_second': 6.246, 'epoch': 0.02}


  0%|          | 100/86640 [01:01<7:49:31,  3.07it/s] 
  0%|          | 100/86640 [01:27<7:49:31,  3.07it/s]

{'eval_loss': 0.482826828956604, 'eval_accuracy': 0.7507668711656442, 'eval_runtime': 26.8769, 'eval_samples_per_second': 48.517, 'eval_steps_per_second': 6.065, 'epoch': 0.03}


                                                       
  0%|          | 150/86640 [02:12<7:49:15,  3.07it/s]

{'eval_loss': 0.407736599445343, 'eval_accuracy': 0.8205521472392638, 'eval_runtime': 27.6917, 'eval_samples_per_second': 47.09, 'eval_steps_per_second': 5.886, 'epoch': 0.05}


  0%|          | 200/86640 [02:29<7:43:01,  3.11it/s]  

{'loss': 0.5546, 'learning_rate': 9.976915974145891e-05, 'epoch': 0.07}


                                                     
  0%|          | 200/86640 [02:57<7:43:01,  3.11it/s]

{'eval_loss': 0.36100077629089355, 'eval_accuracy': 0.8496932515337423, 'eval_runtime': 28.7014, 'eval_samples_per_second': 45.433, 'eval_steps_per_second': 5.679, 'epoch': 0.07}


                                                       
  0%|          | 250/86640 [03:43<9:12:41,  2.61it/s]

{'eval_loss': 0.36655500531196594, 'eval_accuracy': 0.8504601226993865, 'eval_runtime': 28.0162, 'eval_samples_per_second': 46.545, 'eval_steps_per_second': 5.818, 'epoch': 0.09}


                                                       
  0%|          | 300/86640 [04:28<8:00:13,  3.00it/s]

{'eval_loss': 0.3831276595592499, 'eval_accuracy': 0.8328220858895705, 'eval_runtime': 27.7914, 'eval_samples_per_second': 46.921, 'eval_steps_per_second': 5.865, 'epoch': 0.1}


                                                       
  0%|          | 350/86640 [05:13<8:13:31,  2.91it/s]

{'eval_loss': 0.3124072253704071, 'eval_accuracy': 0.8711656441717791, 'eval_runtime': 28.0155, 'eval_samples_per_second': 46.546, 'eval_steps_per_second': 5.818, 'epoch': 0.12}


  0%|          | 400/86640 [05:30<8:39:21,  2.77it/s]  

{'loss': 0.4675, 'learning_rate': 9.953831948291783e-05, 'epoch': 0.14}


                                                     
  0%|          | 400/86640 [05:57<8:39:21,  2.77it/s]

{'eval_loss': 0.4579084813594818, 'eval_accuracy': 0.8029141104294478, 'eval_runtime': 26.6335, 'eval_samples_per_second': 48.961, 'eval_steps_per_second': 6.12, 'epoch': 0.14}


                                                       
  1%|          | 450/86640 [06:41<8:00:06,  2.99it/s]

{'eval_loss': 0.3183535635471344, 'eval_accuracy': 0.8573619631901841, 'eval_runtime': 27.2307, 'eval_samples_per_second': 47.887, 'eval_steps_per_second': 5.986, 'epoch': 0.16}


                                                       
  1%|          | 500/86640 [07:27<8:23:01,  2.85it/s]

{'eval_loss': 0.28294530510902405, 'eval_accuracy': 0.8757668711656442, 'eval_runtime': 28.7131, 'eval_samples_per_second': 45.415, 'eval_steps_per_second': 5.677, 'epoch': 0.17}


                                                       
  1%|          | 550/86640 [08:13<8:04:11,  2.96it/s]

{'eval_loss': 0.3734060525894165, 'eval_accuracy': 0.8404907975460123, 'eval_runtime': 28.8571, 'eval_samples_per_second': 45.188, 'eval_steps_per_second': 5.649, 'epoch': 0.19}


  1%|          | 600/86640 [08:30<8:23:48,  2.85it/s]  

{'loss': 0.3736, 'learning_rate': 9.930747922437674e-05, 'epoch': 0.21}


                                                     
  1%|          | 600/86640 [09:00<8:23:48,  2.85it/s]

{'eval_loss': 0.29867643117904663, 'eval_accuracy': 0.870398773006135, 'eval_runtime': 29.1901, 'eval_samples_per_second': 44.673, 'eval_steps_per_second': 5.584, 'epoch': 0.21}


                                                       
  1%|          | 650/86640 [09:45<8:40:19,  2.75it/s]

{'eval_loss': 0.3124051094055176, 'eval_accuracy': 0.8734662576687117, 'eval_runtime': 27.8346, 'eval_samples_per_second': 46.848, 'eval_steps_per_second': 5.856, 'epoch': 0.23}


  1%|          | 650/86640 [09:46<21:32:15,  1.11it/s]

{'train_runtime': 586.0818, 'train_samples_per_second': 1182.275, 'train_steps_per_second': 147.829, 'train_loss': 0.4591100810124324, 'epoch': 0.23}





TrainOutput(global_step=650, training_loss=0.4591100810124324, metrics={'train_runtime': 586.0818, 'train_samples_per_second': 1182.275, 'train_steps_per_second': 147.829, 'train_loss': 0.4591100810124324, 'epoch': 0.23})

In [8]:
eval_results = trainer.evaluate()

100%|██████████| 163/163 [00:27<00:00,  5.85it/s]


In [8]:
write_eval_results(eval_results,output_dir,task,trainer,adapter_config)

Writing eval results
{'eval_loss': 0.6164497137069702, 'eval_accuracy': 0.6672782874617736, 'eval_runtime': 46.1558, 'eval_samples_per_second': 70.847, 'eval_steps_per_second': 8.861, 'epoch': 0.64}


In [9]:
"""for attr_name, attr_value in vars(default_args).items():
    print(f"{attr_name}: {attr_value}")"""
"""
    train_args = {"label_names":["labels"],
    "evaluation_strategy":"steps",
    "learning_rate":1e-4,
    "num_train_epochs":1,
    "per_device_train_batch_size":8,
    "per_device_eval_batch_size":8,
    "eval_steps":50,
    "logging_steps":200,
    "output_dir":"/eval_results",
    "overwrite_output_dir":True,
    "remove_unused_columns":False,
    "lr_scheduler_type":'linear',
    "load_best_model_at_end":True,
    "metric_for_best_model" : "accuracy",
    "early_stopping_patience":3,
    "save_total_limit":5
}
"""

'\n    train_args = {"label_names":["labels"],\n    "evaluation_strategy":"steps",\n    "learning_rate":1e-4,\n    "num_train_epochs":1,\n    "per_device_train_batch_size":8,\n    "per_device_eval_batch_size":8,\n    "eval_steps":50,\n    "logging_steps":200,\n    "output_dir":"/eval_results",\n    "overwrite_output_dir":True,\n    "remove_unused_columns":False,\n    "lr_scheduler_type":\'linear\',\n    "load_best_model_at_end":True,\n    "metric_for_best_model" : "accuracy",\n    "early_stopping_patience":3,\n    "save_total_limit":5\n}\n'