In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import transformers
import os

In [2]:
# MODEL

_LANGUAGE_         = 'es'
_PRETRAINED_LM_    = 'pysentimiento/robertuito-base-cased'
_PREPROCESS_TEXT_  = True
_TWEET_BATCH_SIZE_ = 5
_ADAPTER_CONFIG_   = transformers.ParallelConfig(mh_adapter = True, reduction_factor = 32)
_MAX_SEQ_LEN_      = 128
_OUTPUT_DIR_       = 'adapter_checkPoints'
_LOGGING_STEPS_    = 2
_NUM_AUTHORS_      = [8, 16, 32, 64, 128, 256, 512]
_K_FOLD_CV_        = 5

# TRAIN

_NO_GPUS_          = 2
_BATCH_SIZE_       = int(32 / _NO_GPUS_)
_EPOCHS_           = {'gender': 20, 'variety': 20}
_LEARNING_RATE_    = 5e-5

## Base

In [3]:
# LABEL DICTONARIES -----------------------------------------------------------------------

gender_dict    = {'female': 0, 
                  'male':   1}

varietyEN_dict = {'australia'    : 0,
                  'canada'       : 1,
                  'great britain' : 2,
                  'ireland'      : 3,
                  'new zealand'   : 4,
                  'united states': 5}

varietyES_dict = {'argentina': 0,
                  'chile'    : 1,
                  'colombia' : 2,
                  'mexico'   : 3,
                  'peru'     : 4,
                  'spain'    : 5,
                  'venezuela': 6}    

In [4]:
# SET LANGUAGE DIRECTORY

if _LANGUAGE_ == 'en':
    variety_dict = varietyEN_dict

elif _LANGUAGE_ == 'es':
    variety_dict = varietyES_dict

In [5]:
# SET LANGUAGE TOKENIZER
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(_PRETRAINED_LM_)
vocab = tokenizer.get_vocab()

## Dataset

In [6]:
from DatasetPAN17 import BasePAN17, DatasetPAN17, DatasetCrossVal

In [7]:
baseTrain  = BasePAN17(Dir             = '../data',
                      split            = 'train',
                      language         = _LANGUAGE_,
                      tokenizer        = tokenizer,
                      gender_dict      = gender_dict,
                      variety_dict     = variety_dict,
                      tweet_batch_size = _TWEET_BATCH_SIZE_,
                      max_seq_len      = _MAX_SEQ_LEN_,
                      preprocess_text  = _PREPROCESS_TEXT_)

baseTest  = BasePAN17(Dir             = '../data',
                      split            = 'test',
                      language         = _LANGUAGE_,
                      tokenizer        = tokenizer,
                      gender_dict      = gender_dict,
                      variety_dict     = variety_dict,
                      tweet_batch_size = _TWEET_BATCH_SIZE_,
                      max_seq_len      = _MAX_SEQ_LEN_,
                      preprocess_text  = _PREPROCESS_TEXT_)


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 84000


Reading data...
    Done
Preprocessing text...
    Done
Tokenizing...
    Done
Merging data...
    Done

Total Instances: 56000



In [8]:
Test = DatasetPAN17(Base_Dataset = baseTest, label = 'gender')

## Training

In [9]:
tasks = ['gender']

In [10]:
from transformers import AutoModelForSequenceClassification, AutoAdapterModel
from transformers import TrainingArguments, Trainer, AdapterTrainer, EarlyStoppingCallback
from TestingPAN17 import compute_accuracy, compute_test_metrics

num_v           = len(baseTest.variety_dict)
num_labels_dict = {"gender": 2, "variety": num_v,}

FewShot_Results = {}

for num in _NUM_AUTHORS_:
    # SHOW CURRENT PORTION
    print("Working with " + str(num) + " authors per label ... ")
    
    dataset_dict = {}
    models = {}
    
    for task_name in tasks:
        
        # INITIALIZE MODEL-----------------------------------------
        
        models[task_name] = AutoAdapterModel.from_pretrained(_PRETRAINED_LM_)
        
        
        acc = []
        f1s = []
        
        for val_idx in range(_K_FOLD_CV_):
            print("Train,Val split number " + str(val_idx + 1) + " of " + str(_K_FOLD_CV_))
        
            # INITIALIZE ADAPTER-----------------------------------------
            
            models[task_name].add_adapter(
                adapter_name = task_name,
                config       = _ADAPTER_CONFIG_
            )
            
            models[task_name].add_classification_head(
                head_name    = task_name,
                num_labels   = num_labels_dict[task_name],
            )
            
            models[task_name].set_active_adapters(task_name)
            models[task_name].train_adapter(task_name)
            
            # GENERATES DATASET WITH CURRENT PORTION ----------------------
            
            data_train, data_val = baseTrain.cross_val(k = _K_FOLD_CV_, val_idx = val_idx, num_authors = num)
            Train = DatasetCrossVal(Base_Data = data_train, label = 'gender')
            Val   = DatasetCrossVal(Base_Data = data_val  , label = 'gender')

            # TRAIN ADAPTER--------------------------------------------

            training_args = TrainingArguments(
                learning_rate               = _LEARNING_RATE_,
                num_train_epochs            = _EPOCHS_[task_name],
                per_device_train_batch_size = _BATCH_SIZE_,
                per_device_eval_batch_size  = _BATCH_SIZE_,
                output_dir                  = _OUTPUT_DIR_ + '/' + task_name,
                save_total_limit            = 5,
                overwrite_output_dir        = True,
                remove_unused_columns       = False,
                evaluation_strategy         = 'epoch',
                logging_strategy            = 'epoch',
                save_strategy               = 'epoch',
                metric_for_best_model       = 'eval_acc',
                load_best_model_at_end      = True,
            )

            trainer = AdapterTrainer(
                model           = models[task_name],
                args            = training_args,
                train_dataset   = Train,
                eval_dataset    = Val,
                compute_metrics = compute_accuracy,
                callbacks       = [EarlyStoppingCallback(early_stopping_patience = 2, early_stopping_threshold = 0.0001)]
            )
            trainer.args._n_gpu = _NO_GPUS_

            trainer.train()

            # TEST MODEL ------------------------------------

            results = trainer.predict(Test)
            metrics = compute_test_metrics(baseTest, results.predictions, 'gender')
            
            acc.append(metrics['accuracy'])
            f1s.append(metrics['f1-score'])
            
            # DELETE ADAPTER --------------------------------
            
            models[task_name].delete_adapter(task_name)
            models[task_name].delete_head(task_name)
        
        acc = np.array(acc)
        f1s = np.array(f1s)
        
        FewShot_Results[num] = {'accuracy': [acc.mean(), acc.std()], 'f1-score': [f1s.mean(), f1s.std()]}
        print("Results with " + str(num) + " authors per label: ", FewShot_Results[num])

Working with 8 authors per label ... 


Some weights of the model checkpoint at pysentimiento/robertuito-base-cased were not used when initializing RobertaAdapterModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaAdapterModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaAdapterModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaAdapterModel were not initialized from the model checkpoint at pysentimiento/robertuito-base-cased and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able

Train,Val split number 1 of 5


***** Running training *****
  Num examples = 320
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 200


Epoch,Training Loss,Validation Loss,Acc
1,0.6406,0.665384,0.596071
2,0.5213,0.663332,0.603274
3,0.4157,0.689176,0.603333
4,0.336,0.711153,0.610952
5,0.2662,0.754629,0.611726
6,0.2435,0.817822,0.613512
7,0.1917,0.832711,0.61381
8,0.1656,0.882409,0.61494
9,0.1338,0.955413,0.611667
10,0.1079,0.99152,0.613988


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-10
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-20] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/ad

Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-90
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_model_head.bin
Deleting older checkpoint [ada

acc: 0.6503571428571429: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 148.38it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 2 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 320
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 200


Epoch,Training Loss,Validation Loss,Acc
1,0.6339,0.674462,0.572619
2,0.5162,0.674062,0.580238
3,0.4215,0.686199,0.589167
4,0.3557,0.693935,0.592083
5,0.2926,0.760352,0.594583
6,0.2747,0.74743,0.601905
7,0.2292,0.790065,0.59994
8,0.1913,0.806497,0.603274
9,0.1728,0.869313,0.605774
10,0.141,0.881684,0.597917


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-10
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/ad

Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-30] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-90
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_model_head.bin
Deleting older checkpoint [ada

acc: 0.6489285714285714: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 146.52it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 3 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 320
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 200


Epoch,Training Loss,Validation Loss,Acc
1,0.6658,0.663832,0.607619
2,0.5572,0.653829,0.608571
3,0.4539,0.660064,0.619107
4,0.396,0.660195,0.616607
5,0.317,0.712125,0.616131


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-10
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-70] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/ad

acc: 0.6178571428571429: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 145.61it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 4 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 320
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 200


Epoch,Training Loss,Validation Loss,Acc
1,0.6633,0.685366,0.55375
2,0.5635,0.678359,0.568274
3,0.4836,0.685287,0.580179
4,0.4308,0.690075,0.57756
5,0.3542,0.721792,0.588274
6,0.3064,0.724355,0.593155
7,0.2656,0.748889,0.596429
8,0.2314,0.760139,0.600774
9,0.2154,0.795623,0.60119
10,0.1848,0.825023,0.597321


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-10
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_

***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-90
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-90/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-90/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-100
Configuration saved in adapter_checkPoints/gender/checkpoint-100/gender/

acc: 0.6235714285714286: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 142.14it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 5 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 320
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 200


Epoch,Training Loss,Validation Loss,Acc
1,0.6658,0.68206,0.567024
2,0.5477,0.677813,0.583036
3,0.4566,0.690869,0.585714
4,0.3853,0.693872,0.59119
5,0.3125,0.755381,0.586607
6,0.2906,0.739075,0.599345
7,0.2495,0.786023,0.591726
8,0.2058,0.811718,0.591845


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-10
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-10/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-10/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-90] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/ad

Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-30] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from adapter_checkPoints/gender/checkpoint-60 (score: 0.5993452380952381).
Could not locate the best model at adapter_checkPoints/gender/checkpoint-60/pytorch_model.bin, if you are running a distributed training on multiple nodes, you should activate `--save_on_each_node`.
Loading best adapter(s) from adapter_checkPoints/gender/checkpoint-60 (score: 0.5993452380952381).
Loading module configuration from adapter_checkPoints/gender/checkpoint-60/gender/adapter_config.json
Overwriting existing adapter 'gender'.
Loading module weights from adapter_checkPoints/gender/checkpoint-60/gender/pytorch_adapter.bin
Load

acc: 0.6457142857142857: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 141.22it/s]


Results with 8 authors per label:  {'accuracy': [0.6372857142857142, 0.013733245822322057], 'f1-score': [0.6003591459285674, 0.0337803528007935]}
Working with 16 authors per label ... 


loading configuration file https://huggingface.co/pysentimiento/robertuito-base-cased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/3f85c0ee804baf604258892a88dd52cdf051d2418a511dcab7cab99a85a3a1b3.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-cased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.21

Train,Val split number 1 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 640
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 400


Epoch,Training Loss,Validation Loss,Acc
1,0.655,0.668453,0.5975
2,0.5241,0.654886,0.61506
3,0.4315,0.680819,0.618155
4,0.3857,0.70767,0.62381
5,0.3258,0.738487,0.620893
6,0.2968,0.772763,0.631071
7,0.2664,0.814285,0.628155
8,0.2104,0.826493,0.630179


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/ad

Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-20] due to args.save_total_limit


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from adapter_checkPoints/gender/checkpoint-120 (score: 0.6310714285714286).
Could not locate the best model at adapter_checkPoints/gender/checkpoint-120/pytorch_model.bin, if you are running a distributed training on multiple nodes, you should activate `--save_on_each_node`.
Loading best adapter(s) from adapter_checkPoints/gender/checkpoint-120 (score: 0.6310714285714286).
Loading module configuration from adapter_checkPoints/gender/checkpoint-120/gender/adapter_config.json
Overwriting existing adapter 'gender'.
Loading module weights from adapter_checkPoints/gender/checkpoint-120/gender/pytorch_adapter.bin
Loading module configuration from adapter_checkPoints/gender/checkpoint-120/gender/head_config.json
Overwriting existing head 'gender'
Adding head 'gender' with config {'head_type': 

acc: 0.6803571428571429: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 143.60it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 2 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 640
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 400


Epoch,Training Loss,Validation Loss,Acc
1,0.6867,0.71429,0.51744
2,0.527,0.768489,0.528869
3,0.4387,0.889431,0.527738
4,0.4233,0.887227,0.542143
5,0.3312,0.88979,0.569107
6,0.3014,0.905229,0.574405
7,0.2571,0.952,0.573452
8,0.2116,1.004137,0.583036
9,0.172,1.157622,0.57125
10,0.14,1.21059,0.57506


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-40] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/ad

Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-180
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_model_head.bin
Deleting older checkp

acc: 0.6003571428571428: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 146.72it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 3 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 640
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 400


Epoch,Training Loss,Validation Loss,Acc
1,0.6399,0.686729,0.562619
2,0.4876,0.731708,0.563155
3,0.3743,0.869235,0.55744
4,0.3836,0.844113,0.571488
5,0.2921,0.865107,0.593988
6,0.2626,0.914313,0.593214
7,0.2151,0.95429,0.592679


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/a

Overwriting existing head 'gender'
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
Loading module weights from adapter_checkPoints/gender/checkpoint-100/gender/pytorch_model_head.bin
***** Running Prediction *****
  Num examples = 56000
  Batch size = 32


acc: 0.6142857142857143: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 145.86it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 4 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 640
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 400


Epoch,Training Loss,Validation Loss,Acc
1,0.6641,0.710385,0.525893
2,0.5242,0.759546,0.539643
3,0.4171,0.881733,0.539762
4,0.4011,0.854416,0.562024
5,0.3059,0.922628,0.568393
6,0.2678,0.969613,0.575536
7,0.2378,0.99623,0.583333
8,0.1977,1.057096,0.588988
9,0.1641,1.157745,0.581845
10,0.1338,1.262715,0.578214


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/ad

Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-180
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_model_head.bin
Deleting older checkp

acc: 0.6096428571428572: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:19<00:00, 146.21it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 5 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 640
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 400


Epoch,Training Loss,Validation Loss,Acc
1,0.6399,0.705844,0.547738
2,0.4876,0.767924,0.550952
3,0.3743,0.923179,0.549821
4,0.3836,0.905528,0.560833
5,0.2921,0.920252,0.576905
6,0.2626,0.975646,0.575179
7,0.2151,1.031716,0.58006
8,0.185,1.120932,0.582738
9,0.1676,1.242012,0.575179
10,0.1386,1.351899,0.567679


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-20
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-20/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-20/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/a

Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-60] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-180
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-180/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-180/gender/pytorch_model_head.bin
Deleting older checkp

acc: 0.6146428571428572: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 147.56it/s]


Results with 16 authors per label:  {'accuracy': [0.6238571428571429, 0.028716062489449346], 'f1-score': [0.6037027685368634, 0.01927943549239881]}
Working with 32 authors per label ... 


loading configuration file https://huggingface.co/pysentimiento/robertuito-base-cased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/3f85c0ee804baf604258892a88dd52cdf051d2418a511dcab7cab99a85a3a1b3.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-cased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.21

Train,Val split number 1 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 1280
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Epoch,Training Loss,Validation Loss,Acc
1,0.6436,0.656015,0.62256
2,0.5108,0.688485,0.622679
3,0.4524,0.809604,0.598274
4,0.4098,0.753407,0.628631
5,0.3713,0.795309,0.622619
6,0.3199,0.850235,0.626786


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-120] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/a

acc: 0.6807142857142857: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 151.87it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 2 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 1280
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Epoch,Training Loss,Validation Loss,Acc
1,0.6364,0.648498,0.61881
2,0.4875,0.65976,0.630714
3,0.4088,0.710929,0.631786
4,0.3681,0.757144,0.637262
5,0.3321,0.882933,0.622024
6,0.2868,0.922438,0.616845


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_

acc: 0.6953571428571429: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 152.22it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 3 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 1280
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Epoch,Training Loss,Validation Loss,Acc
1,0.6364,0.631183,0.64
2,0.4875,0.63866,0.650179
3,0.4088,0.685631,0.651488
4,0.3681,0.724028,0.655893
5,0.3321,0.843491,0.638452
6,0.2868,0.878713,0.632262


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/ad

acc: 0.6953571428571429: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 151.13it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 4 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 1280
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Epoch,Training Loss,Validation Loss,Acc
1,0.6364,0.635521,0.629167
2,0.4875,0.6412,0.638095
3,0.4088,0.699066,0.633393
4,0.3681,0.740632,0.640298
5,0.3321,0.868288,0.62619
6,0.2868,0.904113,0.620893


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_

acc: 0.6953571428571429: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 148.69it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 5 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 1280
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 800


Epoch,Training Loss,Validation Loss,Acc
1,0.6364,0.650391,0.616131
2,0.4875,0.659485,0.635357
3,0.4088,0.713329,0.626369
4,0.3681,0.745567,0.636012
5,0.3321,0.875656,0.61631
6,0.2868,0.920343,0.602738


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-40
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-40/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-40/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-80] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/ad

acc: 0.6953571428571429: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 152.94it/s]


Results with 32 authors per label:  {'accuracy': [0.6924285714285715, 0.0058571428571428715], 'f1-score': [0.6727261174083762, 0.0031541034803075347]}
Working with 64 authors per label ... 


loading configuration file https://huggingface.co/pysentimiento/robertuito-base-cased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/3f85c0ee804baf604258892a88dd52cdf051d2418a511dcab7cab99a85a3a1b3.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-cased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.21

Train,Val split number 1 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 2560
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Epoch,Training Loss,Validation Loss,Acc
1,0.6177,0.633772,0.64375
2,0.5161,0.660513,0.64369
3,0.465,0.711895,0.645952
4,0.4141,0.753205,0.63869
5,0.383,0.80576,0.650298
6,0.3424,0.849004,0.645714
7,0.3228,0.970246,0.623393


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytor

***** Running Prediction *****
  Num examples = 56000
  Batch size = 32


acc: 0.7042857142857143: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 150.04it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 2 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 2560
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Epoch,Training Loss,Validation Loss,Acc
1,0.6043,0.653884,0.625595
2,0.4906,0.690236,0.631131
3,0.4078,0.766593,0.634048
4,0.3785,0.782496,0.63994
5,0.3392,0.842459,0.639107
6,0.3209,0.911682,0.622738


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender

acc: 0.705: 100%|██████████████████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 150.47it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 3 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 2560
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Epoch,Training Loss,Validation Loss,Acc
1,0.5997,0.643814,0.645179
2,0.4902,0.658794,0.656369
3,0.4091,0.716154,0.656548
4,0.3725,0.76787,0.659167
5,0.3395,0.785158,0.658274
6,0.3125,0.85326,0.649107


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender

acc: 0.7057142857142857: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 153.84it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 4 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 2560
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Epoch,Training Loss,Validation Loss,Acc
1,0.5997,0.647272,0.638036
2,0.4902,0.670129,0.645298
3,0.4091,0.72902,0.64494
4,0.3725,0.784515,0.647619
5,0.3395,0.802038,0.649048
6,0.3125,0.869877,0.637024
7,0.2853,0.958899,0.638631


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-160] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender

Overwriting existing head 'gender'
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
Loading module weights from adapter_checkPoints/gender/checkpoint-400/gender/pytorch_model_head.bin
***** Running Prediction *****
  Num examples = 56000
  Batch size = 32


acc: 0.7189285714285715: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 161.62it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 5 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 2560
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 1600


Epoch,Training Loss,Validation Loss,Acc
1,0.6043,0.653136,0.628036
2,0.4906,0.676962,0.636369
3,0.4078,0.742496,0.63875
4,0.3785,0.763128,0.642619
5,0.3392,0.824391,0.639405
6,0.3209,0.87815,0.631964


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-80
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-80/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-80/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-240] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender

acc: 0.705: 100%|██████████████████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 162.09it/s]


Results with 64 authors per label:  {'accuracy': [0.7077857142857144, 0.005589713584718266], 'f1-score': [0.687684720440162, 0.019845425319650088]}
Working with 128 authors per label ... 


loading configuration file https://huggingface.co/pysentimiento/robertuito-base-cased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/3f85c0ee804baf604258892a88dd52cdf051d2418a511dcab7cab99a85a3a1b3.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-cased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.21

Train,Val split number 1 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 5120
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Epoch,Training Loss,Validation Loss,Acc
1,0.6105,0.612351,0.66744
2,0.5336,0.625783,0.667679
3,0.4942,0.63976,0.672083
4,0.4521,0.702366,0.662143
5,0.4266,0.7163,0.672857
6,0.3858,0.754515,0.666488
7,0.3623,0.777387,0.672738


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gende

Loading module weights from adapter_checkPoints/gender/checkpoint-800/gender/pytorch_model_head.bin
***** Running Prediction *****
  Num examples = 56000
  Batch size = 32


acc: 0.7342857142857143: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:18<00:00, 155.30it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 2 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 5120
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Epoch,Training Loss,Validation Loss,Acc
1,0.6068,0.627813,0.656607
2,0.5167,0.657811,0.65881
3,0.4683,0.687314,0.652917
4,0.4431,0.761017,0.651845


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-480] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320

acc: 0.7278571428571429: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 159.37it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 3 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 5120
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Epoch,Training Loss,Validation Loss,Acc
1,0.5926,0.613008,0.672262
2,0.5083,0.651596,0.669226
3,0.4623,0.660715,0.676548
4,0.4436,0.692017,0.680536
5,0.4011,0.713926,0.680357
6,0.3734,0.745108,0.667024


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gende

acc: 0.7367857142857143: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 157.79it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 4 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 5120
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Epoch,Training Loss,Validation Loss,Acc
1,0.6008,0.623244,0.656548
2,0.5171,0.662946,0.657024
3,0.4676,0.685898,0.657262
4,0.4482,0.72994,0.660238
5,0.4081,0.737684,0.6625
6,0.3852,0.792788,0.653869
7,0.3448,0.819218,0.655952


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-320] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320

Overwriting existing head 'gender'
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
Loading module weights from adapter_checkPoints/gender/checkpoint-800/gender/pytorch_model_head.bin
***** Running Prediction *****
  Num examples = 56000
  Batch size = 32


acc: 0.7457142857142857: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 163.80it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 5 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 5120
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 3200


Epoch,Training Loss,Validation Loss,Acc
1,0.6068,0.631938,0.653274
2,0.5167,0.663051,0.652321
3,0.4683,0.683847,0.65619
4,0.4431,0.725859,0.666607
5,0.4089,0.73027,0.65756
6,0.3751,0.798485,0.645119


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-160
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-160/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-160/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-480] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320

acc: 0.7246428571428571: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 163.06it/s]


Results with 128 authors per label:  {'accuracy': [0.7338571428571429, 0.00735193990781773], 'f1-score': [0.7279624959710755, 0.013607124990922394]}
Working with 256 authors per label ... 


loading configuration file https://huggingface.co/pysentimiento/robertuito-base-cased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/3f85c0ee804baf604258892a88dd52cdf051d2418a511dcab7cab99a85a3a1b3.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-cased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.21

Train,Val split number 1 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 10240
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 6400


Epoch,Training Loss,Validation Loss,Acc
1,0.5894,0.619614,0.669226
2,0.5358,0.588963,0.690119
3,0.4909,0.594969,0.693452
4,0.4493,0.615044,0.700476
5,0.4195,0.651101,0.695119
6,0.3874,0.669984,0.694821


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gende

acc: 0.7775: 100%|█████████████████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 157.19it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 2 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 10240
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 6400


Epoch,Training Loss,Validation Loss,Acc
1,0.5842,0.641373,0.654405
2,0.5143,0.641472,0.673095
3,0.4699,0.638489,0.680655
4,0.4321,0.671631,0.684107
5,0.3985,0.701336,0.685238
6,0.358,0.736581,0.680952
7,0.334,0.806297,0.684643


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-800] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640

Loading module configuration from adapter_checkPoints/gender/checkpoint-1600/gender/head_config.json
Overwriting existing head 'gender'
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.
Loading module weights from adapter_checkPoints/gender/checkpoint-1600/gender/pytorch_model_head.bin
***** Running Prediction *****
  Num examples = 56000
  Batch size = 32


acc: 0.7657142857142857: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 159.96it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 3 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 10240
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 6400


Epoch,Training Loss,Validation Loss,Acc
1,0.583,0.628593,0.671012
2,0.5158,0.61254,0.693988
3,0.4672,0.621833,0.697679
4,0.4291,0.643274,0.706071
5,0.3949,0.691732,0.700536
6,0.355,0.69736,0.699345


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-960] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640

acc: 0.7814285714285715: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 159.13it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 4 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 10240
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 6400


Epoch,Training Loss,Validation Loss,Acc
1,0.5842,0.616551,0.677202
2,0.5143,0.601875,0.700774
3,0.4699,0.590751,0.707798
4,0.4321,0.613631,0.712321
5,0.3985,0.659262,0.699702
6,0.358,0.681239,0.697143


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-640] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640

acc: 0.7778571428571428: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 164.33it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 5 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 10240
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 6400


Epoch,Training Loss,Validation Loss,Acc
1,0.5842,0.643271,0.646607
2,0.5143,0.63665,0.677976
3,0.4699,0.629276,0.681845
4,0.4321,0.653643,0.693988
5,0.3985,0.685845,0.683036
6,0.358,0.724062,0.681548


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-320
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-320/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-320/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-640] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640

acc: 0.7778571428571428: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 161.12it/s]


Results with 256 authors per label:  {'accuracy': [0.7760714285714286, 0.0053737836782297346], 'f1-score': [0.7812431706146038, 0.02142299275496716]}
Working with 512 authors per label ... 


loading configuration file https://huggingface.co/pysentimiento/robertuito-base-cased/resolve/main/config.json from cache at /001/usuarios/isaac.bribiesca/.cache/huggingface/transformers/3f85c0ee804baf604258892a88dd52cdf051d2418a511dcab7cab99a85a3a1b3.4cce50d5a926bf18fe43f2ea8d4596b505e97a64e6e700e993def66b06f1c83b
Model config RobertaConfig {
  "_name_or_path": "pysentimiento/robertuito-base-cased",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 130,
  "model_type": "roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "torch_dtype": "float32",
  "transformers_version": "4.21

Train,Val split number 1 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 20480
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 12800


Epoch,Training Loss,Validation Loss,Acc
1,0.5928,0.574615,0.695357
2,0.5367,0.576995,0.709167
3,0.4941,0.578194,0.715238
4,0.4582,0.620873,0.70494
5,0.426,0.602264,0.709405


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-1280
Configuration saved in adapter_checkPoints/gender/checkpoint-1280/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-1280/ge

acc: 0.81: 100%|███████████████████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 160.18it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 2 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 20480
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 12800


Epoch,Training Loss,Validation Loss,Acc
1,0.5718,0.589779,0.696845
2,0.512,0.604269,0.693214
3,0.4713,0.627988,0.693155


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
Deleting older checkpoint [adapter_checkPoints/gender/checkpoint-1280] due to args.save_total_limit
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-1280
Configuration saved in adapter_checkPoints/gender/checkpoint-1

acc: 0.7792857142857142: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 161.36it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 3 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 20480
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 12800


Epoch,Training Loss,Validation Loss,Acc
1,0.5734,0.603916,0.697143
2,0.5232,0.573314,0.712262
3,0.4864,0.59413,0.704821
4,0.4548,0.633203,0.7


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-1280
Configuration saved in adapter_checkPoints/gender/checkpoint-1280/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-1280/ge

acc: 0.7867857142857143: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 157.80it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 4 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 20480
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 12800


Epoch,Training Loss,Validation Loss,Acc
1,0.5757,0.58878,0.70256
2,0.5214,0.564126,0.7125
3,0.4868,0.58555,0.705298
4,0.4568,0.615798,0.705238


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-1280
Configuration saved in adapter_checkPoints/gender/checkpoint-1280/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-1280/ge

acc: 0.7885714285714286: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 159.75it/s]
Adding adapter 'gender'.
Adding head 'gender' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True}.


Train,Val split number 5 of 5


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running training *****
  Num examples = 20480
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 12800


Epoch,Training Loss,Validation Loss,Acc
1,0.5757,0.607603,0.690833
2,0.5214,0.577315,0.6975
3,0.4868,0.619093,0.691369
4,0.4568,0.641859,0.686429


***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-640
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_adapter.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
Configuration saved in adapter_checkPoints/gender/checkpoint-640/gender/head_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-640/gender/pytorch_model_head.bin
***** Running Evaluation *****
  Num examples = 16800
  Batch size = 32
Saving model checkpoint to adapter_checkPoints/gender/checkpoint-1280
Configuration saved in adapter_checkPoints/gender/checkpoint-1280/gender/adapter_config.json
Module weights saved in adapter_checkPoints/gender/checkpoint-1280/ge

acc: 0.7885714285714286: 100%|█████████████████████████████████████████████████████| 2800/2800 [00:17<00:00, 158.85it/s]


Results with 512 authors per label:  {'accuracy': [0.7906428571428572, 0.010267841614895116], 'f1-score': [0.7873580677573417, 0.016908853577989186]}
